Adding upstream version 5.10.209.upstream/5.10.209 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
commit: 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree: a94efe259b9009378be6d90eb30d2b019d95c194 /drivers/net/ethernet/intel/ice
parent: Initial commit. (diff)
download: linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz
linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip
58 files changed, 58491 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
new file mode 100644
index 000000000..6da4f43f2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018, Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection E800 Series Linux Driver
+#
+
+obj-$(CONFIG_ICE) += ice.o
+
+ice-y := ice_main.o	\
+	 ice_controlq.o	\
+	 ice_common.o	\
+	 ice_nvm.o	\
+	 ice_switch.o	\
+	 ice_sched.o	\
+	 ice_base.o	\
+	 ice_lib.o	\
+	 ice_txrx_lib.o	\
+	 ice_txrx.o	\
+	 ice_fltr.o	\
+	 ice_fdir.o	\
+	 ice_ethtool_fdir.o \
+	 ice_flex_pipe.o \
+	 ice_flow.o	\
+	 ice_devlink.o	\
+	 ice_fw_update.o \
+	 ice_ethtool.o
+ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
+ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
+ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
new file mode 100644
index 000000000..7794703c1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -0,0 +1,611 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_H_
+#define _ICE_H_
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/netdevice.h>
+#include <linux/compiler.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/cpumask.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/aer.h>
+#include <linux/interrupt.h>
+#include <linux/ethtool.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/bitmap.h>
+#include <linux/log2.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
+#include <linux/if_bridge.h>
+#include <linux/ctype.h>
+#include <linux/bpf.h>
+#include <linux/avf/virtchnl.h>
+#include <linux/cpu_rmap.h>
+#include <net/devlink.h>
+#include <net/ipv6.h>
+#include <net/xdp_sock.h>
+#include <net/geneve.h>
+#include <net/gre.h>
+#include <net/udp_tunnel.h>
+#include <net/vxlan.h>
+#include "ice_devids.h"
+#include "ice_type.h"
+#include "ice_txrx.h"
+#include "ice_dcb.h"
+#include "ice_switch.h"
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_virtchnl_pf.h"
+#include "ice_sriov.h"
+#include "ice_fdir.h"
+#include "ice_xsk.h"
+#include "ice_arfs.h"
+
+#define ICE_BAR0		0
+#define ICE_REQ_DESC_MULTIPLE	32
+#define ICE_MIN_NUM_DESC	64
+#define ICE_MAX_NUM_DESC	8160
+#define ICE_DFLT_MIN_RX_DESC	512
+#define ICE_DFLT_NUM_TX_DESC	256
+#define ICE_DFLT_NUM_RX_DESC	2048
+
+#define ICE_DFLT_TRAFFIC_CLASS	BIT(0)
+#define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
+#define ICE_AQ_LEN		64
+#define ICE_MBXSQ_LEN		64
+#define ICE_MIN_LAN_TXRX_MSIX	1
+#define ICE_MIN_LAN_OICR_MSIX	1
+#define ICE_MIN_MSIX		(ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
+#define ICE_FDIR_MSIX		1
+#define ICE_NO_VSI		0xffff
+#define ICE_VSI_MAP_CONTIG	0
+#define ICE_VSI_MAP_SCATTER	1
+#define ICE_MAX_SCATTER_TXQS	16
+#define ICE_MAX_SCATTER_RXQS	16
+#define ICE_Q_WAIT_RETRY_LIMIT	10
+#define ICE_Q_WAIT_MAX_RETRY	(5 * ICE_Q_WAIT_RETRY_LIMIT)
+#define ICE_MAX_LG_RSS_QS	256
+#define ICE_RES_VALID_BIT	0x8000
+#define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
+#define ICE_INVAL_Q_INDEX	0xffff
+#define ICE_INVAL_VFID		256
+
+#define ICE_MAX_RESET_WAIT		20
+
+#define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
+
+#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+#define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
+
+#define ICE_UP_TABLE_TRANSLATE(val, i) \
+		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
+		  ICE_AQ_VSI_UP_TABLE_UP##i##_M)
+
+#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
+#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
+#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
+#define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i]))
+
+/* Macro for each VSI in a PF */
+#define ice_for_each_vsi(pf, i) \
+	for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
+
+/* Macros for each Tx/Rx ring in a VSI */
+#define ice_for_each_txq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
+
+#define ice_for_each_rxq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
+
+/* Macros for each allocated Tx/Rx ring whether used or not in a VSI */
+#define ice_for_each_alloc_txq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++)
+
+#define ice_for_each_alloc_rxq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
+
+#define ice_for_each_q_vector(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
+
+#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \
+				ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX)
+
+#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \
+				     ICE_PROMISC_MCAST_TX | \
+				     ICE_PROMISC_UCAST_RX | \
+				     ICE_PROMISC_MCAST_RX | \
+				     ICE_PROMISC_VLAN_TX  | \
+				     ICE_PROMISC_VLAN_RX)
+
+#define ICE_MCAST_PROMISC_BITS (ICE_PROMISC_MCAST_TX | ICE_PROMISC_MCAST_RX)
+
+#define ICE_MCAST_VLAN_PROMISC_BITS (ICE_PROMISC_MCAST_TX | \
+				     ICE_PROMISC_MCAST_RX | \
+				     ICE_PROMISC_VLAN_TX  | \
+				     ICE_PROMISC_VLAN_RX)
+
+#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
+
+struct ice_txq_meta {
+	u32 q_teid;	/* Tx-scheduler element identifier */
+	u16 q_id;	/* Entry in VSI's txq_map bitmap */
+	u16 q_handle;	/* Relative index of Tx queue within TC */
+	u16 vsi_idx;	/* VSI index that Tx queue belongs to */
+	u8 tc;		/* TC number that Tx queue belongs to */
+};
+
+struct ice_tc_info {
+	u16 qoffset;
+	u16 qcount_tx;
+	u16 qcount_rx;
+	u8 netdev_tc;
+};
+
+struct ice_tc_cfg {
+	u8 numtc; /* Total number of enabled TCs */
+	u8 ena_tc; /* Tx map */
+	struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
+};
+
+struct ice_res_tracker {
+	u16 num_entries;
+	u16 end;
+	u16 list[1];
+};
+
+struct ice_qs_cfg {
+	struct mutex *qs_mutex;  /* will be assigned to &pf->avail_q_mutex */
+	unsigned long *pf_map;
+	unsigned long pf_map_size;
+	unsigned int q_count;
+	unsigned int scatter_count;
+	u16 *vsi_map;
+	u16 vsi_map_offset;
+	u8 mapping_mode;
+};
+
+struct ice_sw {
+	struct ice_pf *pf;
+	u16 sw_id;		/* switch ID for this switch */
+	u16 bridge_mode;	/* VEB/VEPA/Port Virtualizer */
+	struct ice_vsi *dflt_vsi;	/* default VSI for this switch */
+	u8 dflt_vsi_ena:1;	/* true if above dflt_vsi is enabled */
+};
+
+enum ice_state {
+	__ICE_TESTING,
+	__ICE_DOWN,
+	__ICE_NEEDS_RESTART,
+	__ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
+	__ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
+	__ICE_PFR_REQ,			/* set by driver and peers */
+	__ICE_CORER_REQ,		/* set by driver and peers */
+	__ICE_GLOBR_REQ,		/* set by driver and peers */
+	__ICE_CORER_RECV,		/* set by OICR handler */
+	__ICE_GLOBR_RECV,		/* set by OICR handler */
+	__ICE_EMPR_RECV,		/* set by OICR handler */
+	__ICE_SUSPENDED,		/* set on module remove path */
+	__ICE_RESET_FAILED,		/* set by reset/rebuild */
+	/* When checking for the PF to be in a nominal operating state, the
+	 * bits that are grouped at the beginning of the list need to be
+	 * checked. Bits occurring before __ICE_STATE_NOMINAL_CHECK_BITS will
+	 * be checked. If you need to add a bit into consideration for nominal
+	 * operating state, it must be added before
+	 * __ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position
+	 * without appropriate consideration.
+	 */
+	__ICE_STATE_NOMINAL_CHECK_BITS,
+	__ICE_ADMINQ_EVENT_PENDING,
+	__ICE_MAILBOXQ_EVENT_PENDING,
+	__ICE_MDD_EVENT_PENDING,
+	__ICE_VFLR_EVENT_PENDING,
+	__ICE_FLTR_OVERFLOW_PROMISC,
+	__ICE_VF_DIS,
+	__ICE_CFG_BUSY,
+	__ICE_SERVICE_SCHED,
+	__ICE_SERVICE_DIS,
+	__ICE_FD_FLUSH_REQ,
+	__ICE_OICR_INTR_DIS,		/* Global OICR interrupt disabled */
+	__ICE_MDD_VF_PRINT_PENDING,	/* set when MDD event handle */
+	__ICE_VF_RESETS_DISABLED,	/* disable resets during ice_remove */
+	__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+	__ICE_PHY_INIT_COMPLETE,
+	__ICE_STATE_NBITS		/* must be last */
+};
+
+enum ice_vsi_flags {
+	ICE_VSI_FLAG_UMAC_FLTR_CHANGED,
+	ICE_VSI_FLAG_MMAC_FLTR_CHANGED,
+	ICE_VSI_FLAG_VLAN_FLTR_CHANGED,
+	ICE_VSI_FLAG_PROMISC_CHANGED,
+	ICE_VSI_FLAG_NBITS		/* must be last */
+};
+
+/* struct that defines a VSI, associated with a dev */
+struct ice_vsi {
+	struct net_device *netdev;
+	struct ice_sw *vsw;		 /* switch this VSI is on */
+	struct ice_pf *back;		 /* back pointer to PF */
+	struct ice_port_info *port_info; /* back pointer to port_info */
+	struct ice_ring **rx_rings;	 /* Rx ring array */
+	struct ice_ring **tx_rings;	 /* Tx ring array */
+	struct ice_q_vector **q_vectors; /* q_vector array */
+
+	irqreturn_t (*irq_handler)(int irq, void *data);
+
+	u64 tx_linearize;
+	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS);
+	unsigned int current_netdev_flags;
+	u32 tx_restart;
+	u32 tx_busy;
+	u32 rx_buf_failed;
+	u32 rx_page_failed;
+	u32 rx_gro_dropped;
+	u16 num_q_vectors;
+	u16 base_vector;		/* IRQ base for OS reserved vectors */
+	enum ice_vsi_type type;
+	u16 vsi_num;			/* HW (absolute) index of this VSI */
+	u16 idx;			/* software index in pf->vsi[] */
+
+	s16 vf_id;			/* VF ID for SR-IOV VSIs */
+
+	u16 ethtype;			/* Ethernet protocol for pause frame */
+	u16 num_gfltr;
+	u16 num_bfltr;
+
+	/* RSS config */
+	u16 rss_table_size;	/* HW RSS table size */
+	u16 rss_size;		/* Allocated RSS queues */
+	u8 *rss_hkey_user;	/* User configured hash keys */
+	u8 *rss_lut_user;	/* User configured lookup table entries */
+	u8 rss_lut_type;	/* used to configure Get/Set RSS LUT AQ call */
+
+	/* aRFS members only allocated for the PF VSI */
+#define ICE_MAX_ARFS_LIST	1024
+#define ICE_ARFS_LST_MASK	(ICE_MAX_ARFS_LIST - 1)
+	struct hlist_head *arfs_fltr_list;
+	struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
+	spinlock_t arfs_lock;	/* protects aRFS hash table and filter state */
+	atomic_t *arfs_last_fltr_id;
+
+	/* devlink port data */
+	struct devlink_port devlink_port;
+	bool devlink_port_registered;
+
+	u16 max_frame;
+	u16 rx_buf_len;
+
+	struct ice_aqc_vsi_props info;	 /* VSI properties */
+
+	/* VSI stats */
+	struct rtnl_link_stats64 net_stats;
+	struct ice_eth_stats eth_stats;
+	struct ice_eth_stats eth_stats_prev;
+
+	struct list_head tmp_sync_list;		/* MAC filters to be synced */
+	struct list_head tmp_unsync_list;	/* MAC filters to be unsynced */
+
+	u8 irqs_ready:1;
+	u8 current_isup:1;		 /* Sync 'link up' logging */
+	u8 stat_offsets_loaded:1;
+	u8 vlan_ena:1;
+	u16 num_vlan;
+
+	/* queue information */
+	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	u8 rx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	u16 *txq_map;			 /* index in pf->avail_txqs */
+	u16 *rxq_map;			 /* index in pf->avail_rxqs */
+	u16 alloc_txq;			 /* Allocated Tx queues */
+	u16 num_txq;			 /* Used Tx queues */
+	u16 alloc_rxq;			 /* Allocated Rx queues */
+	u16 num_rxq;			 /* Used Rx queues */
+	u16 req_txq;			 /* User requested Tx queues */
+	u16 req_rxq;			 /* User requested Rx queues */
+	u16 num_rx_desc;
+	u16 num_tx_desc;
+	struct ice_tc_cfg tc_cfg;
+	struct bpf_prog *xdp_prog;
+	struct ice_ring **xdp_rings;	 /* XDP ring array */
+	u16 num_xdp_txq;		 /* Used XDP queues */
+	u8 xdp_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	struct xsk_buff_pool **xsk_pools;
+	u16 num_xsk_pools_used;
+	u16 num_xsk_pools;
+} ____cacheline_internodealigned_in_smp;
+
+/* struct that defines an interrupt vector */
+struct ice_q_vector {
+	struct ice_vsi *vsi;
+
+	u16 v_idx;			/* index in the vsi->q_vector array. */
+	u16 reg_idx;
+	u8 num_ring_rx;			/* total number of Rx rings in vector */
+	u8 num_ring_tx;			/* total number of Tx rings in vector */
+	u8 itr_countdown;		/* when 0 should adjust adaptive ITR */
+	/* in usecs, need to use ice_intrl_to_usecs_reg() before writing this
+	 * value to the device
+	 */
+	u8 intrl;
+
+	struct napi_struct napi;
+
+	struct ice_ring_container rx;
+	struct ice_ring_container tx;
+
+	cpumask_t affinity_mask;
+	struct irq_affinity_notify affinity_notify;
+
+	char name[ICE_INT_NAME_STR_LEN];
+} ____cacheline_internodealigned_in_smp;
+
+enum ice_pf_flags {
+	ICE_FLAG_FLTR_SYNC,
+	ICE_FLAG_RSS_ENA,
+	ICE_FLAG_SRIOV_ENA,
+	ICE_FLAG_SRIOV_CAPABLE,
+	ICE_FLAG_DCB_CAPABLE,
+	ICE_FLAG_DCB_ENA,
+	ICE_FLAG_FD_ENA,
+	ICE_FLAG_ADV_FEATURES,
+	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
+	ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
+	ICE_FLAG_NO_MEDIA,
+	ICE_FLAG_FW_LLDP_AGENT,
+	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
+	ICE_FLAG_LEGACY_RX,
+	ICE_FLAG_VF_TRUE_PROMISC_ENA,
+	ICE_FLAG_MDD_AUTO_RESET_VF,
+	ICE_FLAG_LINK_LENIENT_MODE_ENA,
+	ICE_PF_FLAGS_NBITS		/* must be last */
+};
+
+struct ice_pf {
+	struct pci_dev *pdev;
+
+	struct devlink_region *nvm_region;
+	struct devlink_region *devcaps_region;
+
+	/* OS reserved IRQ details */
+	struct msix_entry *msix_entries;
+	struct ice_res_tracker *irq_tracker;
+	/* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
+	 * number of MSIX vectors needed for all SR-IOV VFs from the number of
+	 * MSIX vectors allowed on this PF.
+	 */
+	u16 sriov_base_vector;
+
+	u16 ctrl_vsi_idx;		/* control VSI index in pf->vsi array */
+
+	struct ice_vsi **vsi;		/* VSIs created by the driver */
+	struct ice_sw *first_sw;	/* first switch created by firmware */
+	/* Virtchnl/SR-IOV config info */
+	struct ice_vf *vf;
+	u16 num_alloc_vfs;		/* actual number of VFs allocated */
+	u16 num_vfs_supported;		/* num VFs supported for this PF */
+	u16 num_qps_per_vf;
+	u16 num_msix_per_vf;
+	/* used to ratelimit the MDD event logging */
+	unsigned long last_printed_mdd_jiffies;
+	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
+	unsigned long *avail_txqs;	/* bitmap to track PF Tx queue usage */
+	unsigned long *avail_rxqs;	/* bitmap to track PF Rx queue usage */
+	unsigned long serv_tmr_period;
+	unsigned long serv_tmr_prev;
+	struct timer_list serv_tmr;
+	struct work_struct serv_task;
+	struct mutex avail_q_mutex;	/* protects access to avail_[rx|tx]qs */
+	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
+	struct mutex tc_mutex;		/* lock to protect TC changes */
+	u32 msg_enable;
+
+	/* spinlock to protect the AdminQ wait list */
+	spinlock_t aq_wait_lock;
+	struct hlist_head aq_wait_list;
+	wait_queue_head_t aq_wait_queue;
+
+	u32 hw_csum_rx_error;
+	u16 oicr_idx;		/* Other interrupt cause MSIX vector index */
+	u16 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
+	u16 max_pf_txqs;	/* Total Tx queues PF wide */
+	u16 max_pf_rxqs;	/* Total Rx queues PF wide */
+	u16 num_lan_msix;	/* Total MSIX vectors for base driver */
+	u16 num_lan_tx;		/* num LAN Tx queues setup */
+	u16 num_lan_rx;		/* num LAN Rx queues setup */
+	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
+	u16 num_alloc_vsi;
+	u16 corer_count;	/* Core reset count */
+	u16 globr_count;	/* Global reset count */
+	u16 empr_count;		/* EMP reset count */
+	u16 pfr_count;		/* PF reset count */
+
+	u8 wol_ena : 1;		/* software state of WoL */
+	u32 wakeup_reason;	/* last wakeup reason */
+	struct ice_hw_port_stats stats;
+	struct ice_hw_port_stats stats_prev;
+	struct ice_hw hw;
+	u8 stat_prev_loaded:1; /* has previous stats been loaded */
+	u16 dcbx_cap;
+	u32 tx_timeout_count;
+	unsigned long tx_timeout_last_recovery;
+	u32 tx_timeout_recovery_level;
+	char int_name[ICE_INT_NAME_STR_LEN];
+	u32 sw_int_count;
+
+	__le64 nvm_phy_type_lo; /* NVM PHY type low */
+	__le64 nvm_phy_type_hi; /* NVM PHY type high */
+	struct ice_link_default_override_tlv link_dflt_override;
+};
+
+struct ice_netdev_priv {
+	struct ice_vsi *vsi;
+};
+
+/**
+ * ice_irq_dynamic_ena - Enable default interrupt generation settings
+ * @hw: pointer to HW struct
+ * @vsi: pointer to VSI struct, can be NULL
+ * @q_vector: pointer to q_vector, can be NULL
+ */
+static inline void
+ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
+		    struct ice_q_vector *q_vector)
+{
+	u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
+				((struct ice_pf *)hw->back)->oicr_idx;
+	int itr = ICE_ITR_NONE;
+	u32 val;
+
+	/* clear the PBA here, as this function is meant to clean out all
+	 * previous interrupts and enable the interrupt
+	 */
+	val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+	      (itr << GLINT_DYN_CTL_ITR_INDX_S);
+	if (vsi)
+		if (test_bit(__ICE_DOWN, vsi->state))
+			return;
+	wr32(hw, GLINT_DYN_CTL(vector), val);
+}
+
+/**
+ * ice_netdev_to_pf - Retrieve the PF struct associated with a netdev
+ * @netdev: pointer to the netdev struct
+ */
+static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return np->vsi->back;
+}
+
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+	return !!READ_ONCE(vsi->xdp_prog);
+}
+
+static inline void ice_set_ring_xdp(struct ice_ring *ring)
+{
+	ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
+/**
+ * ice_xsk_pool - get XSK buffer pool bound to a ring
+ * @ring: ring to use
+ *
+ * Returns a pointer to xdp_umem structure if there is a buffer pool present,
+ * NULL otherwise.
+ */
+static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring)
+{
+	struct xsk_buff_pool **pools = ring->vsi->xsk_pools;
+	u16 qid = ring->q_index;
+
+	if (ice_ring_is_xdp(ring))
+		qid -= ring->vsi->num_xdp_txq;
+
+	if (qid >= ring->vsi->num_xsk_pools || !pools || !pools[qid] ||
+	    !ice_is_xdp_ena_vsi(ring->vsi))
+		return NULL;
+
+	return pools[qid];
+}
+
+/**
+ * ice_get_main_vsi - Get the PF VSI
+ * @pf: PF instance
+ *
+ * returns pf->vsi[0], which by definition is the PF VSI
+ */
+static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf)
+{
+	if (pf->vsi)
+		return pf->vsi[0];
+
+	return NULL;
+}
+
+/**
+ * ice_get_ctrl_vsi - Get the control VSI
+ * @pf: PF instance
+ */
+static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf)
+{
+	/* if pf->ctrl_vsi_idx is ICE_NO_VSI, control VSI was not set up */
+	if (!pf->vsi || pf->ctrl_vsi_idx == ICE_NO_VSI)
+		return NULL;
+
+	return pf->vsi[pf->ctrl_vsi_idx];
+}
+
+#define ICE_FD_STAT_CTR_BLOCK_COUNT	256
+#define ICE_FD_STAT_PF_IDX(base_idx) \
+			((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
+#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)
+
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
+int ice_vsi_open_ctrl(struct ice_vsi *vsi);
+void ice_set_ethtool_ops(struct net_device *netdev);
+void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
+u16 ice_get_avail_txq_count(struct ice_pf *pf);
+u16 ice_get_avail_rxq_count(struct ice_pf *pf);
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx);
+void ice_update_vsi_stats(struct ice_vsi *vsi);
+void ice_update_pf_stats(struct ice_pf *pf);
+int ice_up(struct ice_vsi *vsi);
+int ice_down(struct ice_vsi *vsi);
+int ice_vsi_cfg(struct ice_vsi *vsi);
+struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+	     u32 flags);
+int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+const char *ice_stat_str(enum ice_status stat_err);
+const char *ice_aq_str(enum ice_aq_err aq_err);
+bool ice_is_wol_supported(struct ice_hw *hw);
+int
+ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
+		    bool is_tun);
+void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena);
+int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
+int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
+int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd);
+int
+ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
+		      u32 *rule_locs);
+void ice_fdir_release_flows(struct ice_hw *hw);
+void ice_fdir_replay_flows(struct ice_hw *hw);
+void ice_fdir_replay_fltrs(struct ice_pf *pf);
+int ice_fdir_create_dflt_rules(struct ice_pf *pf);
+int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
+			  struct ice_rq_event_info *event);
+int ice_open(struct net_device *netdev);
+int ice_open_internal(struct net_device *netdev);
+int ice_stop(struct net_device *netdev);
+void ice_service_task_schedule(struct ice_pf *pf);
+
+#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
new file mode 100644
index 000000000..b6dd8f81d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -0,0 +1,2045 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_ADMINQ_CMD_H_
+#define _ICE_ADMINQ_CMD_H_
+
+/* This header file defines the Admin Queue commands, error codes and
+ * descriptor format. It is shared between Firmware and Software.
+ */
+
+#define ICE_MAX_VSI			768
+#define ICE_AQC_TOPO_MAX_LEVEL_NUM	0x9
+#define ICE_AQ_SET_MAC_FRAME_SIZE_MAX	9728
+
+struct ice_aqc_generic {
+	__le32 param0;
+	__le32 param1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get version (direct 0x0001) */
+struct ice_aqc_get_ver {
+	__le32 rom_ver;
+	__le32 fw_build;
+	u8 fw_branch;
+	u8 fw_major;
+	u8 fw_minor;
+	u8 fw_patch;
+	u8 api_branch;
+	u8 api_major;
+	u8 api_minor;
+	u8 api_patch;
+};
+
+/* Send driver version (indirect 0x0002) */
+struct ice_aqc_driver_ver {
+	u8 major_ver;
+	u8 minor_ver;
+	u8 build_ver;
+	u8 subbuild_ver;
+	u8 reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Queue Shutdown (direct 0x0003) */
+struct ice_aqc_q_shutdown {
+	u8 driver_unloading;
+#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
+	u8 reserved[15];
+};
+
+/* Request resource ownership (direct 0x0008)
+ * Release resource ownership (direct 0x0009)
+ */
+struct ice_aqc_req_res {
+	__le16 res_id;
+#define ICE_AQC_RES_ID_NVM		1
+#define ICE_AQC_RES_ID_SDP		2
+#define ICE_AQC_RES_ID_CHNG_LOCK	3
+#define ICE_AQC_RES_ID_GLBL_LOCK	4
+	__le16 access_type;
+#define ICE_AQC_RES_ACCESS_READ		1
+#define ICE_AQC_RES_ACCESS_WRITE	2
+
+	/* Upon successful completion, FW writes this value and driver is
+	 * expected to release resource before timeout. This value is provided
+	 * in milliseconds.
+	 */
+	__le32 timeout;
+#define ICE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS	3000
+#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS	180000
+#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS	1000
+#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS	3000
+	/* For SDP: pin ID of the SDP */
+	__le32 res_number;
+	/* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */
+	__le16 status;
+#define ICE_AQ_RES_GLBL_SUCCESS		0
+#define ICE_AQ_RES_GLBL_IN_PROG		1
+#define ICE_AQ_RES_GLBL_DONE		2
+	u8 reserved[2];
+};
+
+/* Get function capabilities (indirect 0x000A)
+ * Get device capabilities (indirect 0x000B)
+ */
+struct ice_aqc_list_caps {
+	u8 cmd_flags;
+	u8 pf_index;
+	u8 reserved[2];
+	__le32 count;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Device/Function buffer entry, repeated per reported capability */
+struct ice_aqc_list_caps_elem {
+	__le16 cap;
+#define ICE_AQC_CAPS_VALID_FUNCTIONS			0x0005
+#define ICE_AQC_CAPS_SRIOV				0x0012
+#define ICE_AQC_CAPS_VF					0x0013
+#define ICE_AQC_CAPS_VSI				0x0017
+#define ICE_AQC_CAPS_DCB				0x0018
+#define ICE_AQC_CAPS_RSS				0x0040
+#define ICE_AQC_CAPS_RXQS				0x0041
+#define ICE_AQC_CAPS_TXQS				0x0042
+#define ICE_AQC_CAPS_MSIX				0x0043
+#define ICE_AQC_CAPS_FD					0x0045
+#define ICE_AQC_CAPS_MAX_MTU				0x0047
+#define ICE_AQC_CAPS_NVM_VER				0x0048
+#define ICE_AQC_CAPS_PENDING_NVM_VER			0x0049
+#define ICE_AQC_CAPS_OROM_VER				0x004A
+#define ICE_AQC_CAPS_PENDING_OROM_VER			0x004B
+#define ICE_AQC_CAPS_NET_VER				0x004C
+#define ICE_AQC_CAPS_PENDING_NET_VER			0x004D
+#define ICE_AQC_CAPS_NVM_MGMT				0x0080
+
+	u8 major_ver;
+	u8 minor_ver;
+	/* Number of resources described by this capability */
+	__le32 number;
+	/* Only meaningful for some types of resources */
+	__le32 logical_id;
+	/* Only meaningful for some types of resources */
+	__le32 phys_id;
+	__le64 rsvd1;
+	__le64 rsvd2;
+};
+
+/* Manage MAC address, read command - indirect (0x0107)
+ * This struct is also used for the response
+ */
+struct ice_aqc_manage_mac_read {
+	__le16 flags; /* Zeroed by device driver */
+#define ICE_AQC_MAN_MAC_LAN_ADDR_VALID		BIT(4)
+#define ICE_AQC_MAN_MAC_SAN_ADDR_VALID		BIT(5)
+#define ICE_AQC_MAN_MAC_PORT_ADDR_VALID		BIT(6)
+#define ICE_AQC_MAN_MAC_WOL_ADDR_VALID		BIT(7)
+#define ICE_AQC_MAN_MAC_READ_S			4
+#define ICE_AQC_MAN_MAC_READ_M			(0xF << ICE_AQC_MAN_MAC_READ_S)
+	u8 rsvd[2];
+	u8 num_addr; /* Used in response */
+	u8 rsvd1[3];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response buffer format for manage MAC read command */
+struct ice_aqc_manage_mac_read_resp {
+	u8 lport_num;
+	u8 addr_type;
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_LAN		0
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_WOL		1
+	u8 mac_addr[ETH_ALEN];
+};
+
+/* Manage MAC address, write command - direct (0x0108) */
+struct ice_aqc_manage_mac_write {
+	u8 rsvd;
+	u8 flags;
+#define ICE_AQC_MAN_MAC_WR_MC_MAG_EN		BIT(0)
+#define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP	BIT(1)
+#define ICE_AQC_MAN_MAC_WR_S		6
+#define ICE_AQC_MAN_MAC_WR_M		ICE_M(3, ICE_AQC_MAN_MAC_WR_S)
+#define ICE_AQC_MAN_MAC_UPDATE_LAA	0
+#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL	BIT(ICE_AQC_MAN_MAC_WR_S)
+	/* byte stream in network order */
+	u8 mac_addr[ETH_ALEN];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Clear PXE Command and response (direct 0x0110) */
+struct ice_aqc_clear_pxe {
+	u8 rx_cnt;
+#define ICE_AQC_CLEAR_PXE_RX_CNT		0x2
+	u8 reserved[15];
+};
+
+/* Get switch configuration (0x0200) */
+struct ice_aqc_get_sw_cfg {
+	/* Reserved for command and copy of request flags for response */
+	__le16 flags;
+	/* First desc in case of command and next_elem in case of response
+	 * In case of response, if it is not zero, means all the configuration
+	 * was not returned and new command shall be sent with this value in
+	 * the 'first desc' field
+	 */
+	__le16 element;
+	/* Reserved for command, only used for response */
+	__le16 num_elems;
+	__le16 rsvd;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Each entry in the response buffer is of the following type: */
+struct ice_aqc_get_sw_cfg_resp_elem {
+	/* VSI/Port Number */
+	__le16 vsi_port_num;
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S	0
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M	\
+			(0x3FF << ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_S	14
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_M	(0x3 << ICE_AQC_GET_SW_CONF_RESP_TYPE_S)
+#define ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT	0
+#define ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT	1
+#define ICE_AQC_GET_SW_CONF_RESP_VSI		2
+
+	/* SWID VSI/Port belongs to */
+	__le16 swid;
+
+	/* Bit 14..0 : PF/VF number VSI belongs to
+	 * Bit 15 : VF indication bit
+	 */
+	__le16 pf_vf_num;
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S	0
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M	\
+				(0x7FFF << ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_IS_VF		BIT(15)
+};
+
+/* These resource type defines are used for all switch resource
+ * commands where a resource type is required, such as:
+ * Get Resource Allocation command (indirect 0x0204)
+ * Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ * Get Allocated Resource Descriptors Command (indirect 0x020A)
+ */
+#define ICE_AQC_RES_TYPE_VSI_LIST_REP			0x03
+#define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE			0x04
+#define ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK		0x21
+#define ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES	0x22
+#define ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES		0x23
+#define ICE_AQC_RES_TYPE_FD_PROF_BLDR_PROFID		0x58
+#define ICE_AQC_RES_TYPE_FD_PROF_BLDR_TCAM		0x59
+#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID		0x60
+#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM		0x61
+
+#define ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM		BIT(12)
+#define ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX		BIT(13)
+
+#define ICE_AQC_RES_TYPE_FLAG_DEDICATED			0x00
+
+#define ICE_AQC_RES_TYPE_S	0
+#define ICE_AQC_RES_TYPE_M	(0x07F << ICE_AQC_RES_TYPE_S)
+
+/* Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ */
+struct ice_aqc_alloc_free_res_cmd {
+	__le16 num_entries; /* Number of Resource entries */
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Resource descriptor */
+struct ice_aqc_res_elem {
+	union {
+		__le16 sw_resp;
+		__le16 flu_resp;
+	} e;
+};
+
+/* Buffer for Allocate/Free Resources commands */
+struct ice_aqc_alloc_free_res_elem {
+	__le16 res_type; /* Types defined above cmd 0x0204 */
+#define ICE_AQC_RES_TYPE_SHARED_S	7
+#define ICE_AQC_RES_TYPE_SHARED_M	(0x1 << ICE_AQC_RES_TYPE_SHARED_S)
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S	8
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_M	\
+				(0xF << ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S)
+	__le16 num_elems;
+	struct ice_aqc_res_elem elem[];
+};
+
+/* Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Get VSI (indirect 0x0212)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_get_update_free_vsi {
+	__le16 vsi_num;
+#define ICE_AQ_VSI_NUM_S	0
+#define ICE_AQ_VSI_NUM_M	(0x03FF << ICE_AQ_VSI_NUM_S)
+#define ICE_AQ_VSI_IS_VALID	BIT(15)
+	__le16 cmd_flags;
+#define ICE_AQ_VSI_KEEP_ALLOC	0x1
+	u8 vf_id;
+	u8 reserved;
+	__le16 vsi_flags;
+#define ICE_AQ_VSI_TYPE_S	0
+#define ICE_AQ_VSI_TYPE_M	(0x3 << ICE_AQ_VSI_TYPE_S)
+#define ICE_AQ_VSI_TYPE_VF	0x0
+#define ICE_AQ_VSI_TYPE_VMDQ2	0x1
+#define ICE_AQ_VSI_TYPE_PF	0x2
+#define ICE_AQ_VSI_TYPE_EMP_MNG	0x3
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response descriptor for:
+ * Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_update_free_vsi_resp {
+	__le16 vsi_num;
+	__le16 ext_status;
+	__le16 vsi_used;
+	__le16 vsi_free;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_vsi_props {
+	__le16 valid_sections;
+#define ICE_AQ_VSI_PROP_SW_VALID		BIT(0)
+#define ICE_AQ_VSI_PROP_SECURITY_VALID		BIT(1)
+#define ICE_AQ_VSI_PROP_VLAN_VALID		BIT(2)
+#define ICE_AQ_VSI_PROP_OUTER_TAG_VALID		BIT(3)
+#define ICE_AQ_VSI_PROP_INGRESS_UP_VALID	BIT(4)
+#define ICE_AQ_VSI_PROP_EGRESS_UP_VALID		BIT(5)
+#define ICE_AQ_VSI_PROP_RXQ_MAP_VALID		BIT(6)
+#define ICE_AQ_VSI_PROP_Q_OPT_VALID		BIT(7)
+#define ICE_AQ_VSI_PROP_OUTER_UP_VALID		BIT(8)
+#define ICE_AQ_VSI_PROP_FLOW_DIR_VALID		BIT(11)
+#define ICE_AQ_VSI_PROP_PASID_VALID		BIT(12)
+	/* switch section */
+	u8 sw_id;
+	u8 sw_flags;
+#define ICE_AQ_VSI_SW_FLAG_ALLOW_LB		BIT(5)
+#define ICE_AQ_VSI_SW_FLAG_LOCAL_LB		BIT(6)
+#define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE		BIT(7)
+	u8 sw_flags2;
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S	0
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M	\
+				(0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S)
+#define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA	BIT(0)
+#define ICE_AQ_VSI_SW_FLAG_LAN_ENA		BIT(4)
+	u8 veb_stat_id;
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_S		0
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_M	(0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S)
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID		BIT(5)
+	/* security section */
+	u8 sec_flags;
+#define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	BIT(0)
+#define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF	BIT(2)
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S	4
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M	(0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)
+#define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA	BIT(0)
+	u8 sec_reserved;
+	/* VLAN section */
+	__le16 pvid; /* VLANS include priority bits */
+	u8 pvlan_reserved[2];
+	u8 vlan_flags;
+#define ICE_AQ_VSI_VLAN_MODE_S	0
+#define ICE_AQ_VSI_VLAN_MODE_M	(0x3 << ICE_AQ_VSI_VLAN_MODE_S)
+#define ICE_AQ_VSI_VLAN_MODE_UNTAGGED	0x1
+#define ICE_AQ_VSI_VLAN_MODE_TAGGED	0x2
+#define ICE_AQ_VSI_VLAN_MODE_ALL	0x3
+#define ICE_AQ_VSI_PVLAN_INSERT_PVID	BIT(2)
+#define ICE_AQ_VSI_VLAN_EMOD_S		3
+#define ICE_AQ_VSI_VLAN_EMOD_M		(0x3 << ICE_AQ_VSI_VLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_STR_BOTH	(0x0 << ICE_AQ_VSI_VLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_STR_UP	(0x1 << ICE_AQ_VSI_VLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_STR	(0x2 << ICE_AQ_VSI_VLAN_EMOD_S)
+#define ICE_AQ_VSI_VLAN_EMOD_NOTHING	(0x3 << ICE_AQ_VSI_VLAN_EMOD_S)
+	u8 pvlan_reserved2[3];
+	/* ingress egress up sections */
+	__le32 ingress_table; /* bitmap, 3 bits per up */
+#define ICE_AQ_VSI_UP_TABLE_UP0_S	0
+#define ICE_AQ_VSI_UP_TABLE_UP0_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S)
+#define ICE_AQ_VSI_UP_TABLE_UP1_S	3
+#define ICE_AQ_VSI_UP_TABLE_UP1_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S)
+#define ICE_AQ_VSI_UP_TABLE_UP2_S	6
+#define ICE_AQ_VSI_UP_TABLE_UP2_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S)
+#define ICE_AQ_VSI_UP_TABLE_UP3_S	9
+#define ICE_AQ_VSI_UP_TABLE_UP3_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S)
+#define ICE_AQ_VSI_UP_TABLE_UP4_S	12
+#define ICE_AQ_VSI_UP_TABLE_UP4_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S)
+#define ICE_AQ_VSI_UP_TABLE_UP5_S	15
+#define ICE_AQ_VSI_UP_TABLE_UP5_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S)
+#define ICE_AQ_VSI_UP_TABLE_UP6_S	18
+#define ICE_AQ_VSI_UP_TABLE_UP6_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S)
+#define ICE_AQ_VSI_UP_TABLE_UP7_S	21
+#define ICE_AQ_VSI_UP_TABLE_UP7_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S)
+	__le32 egress_table;   /* same defines as for ingress table */
+	/* outer tags section */
+	__le16 outer_tag;
+	u8 outer_tag_flags;
+#define ICE_AQ_VSI_OUTER_TAG_MODE_S	0
+#define ICE_AQ_VSI_OUTER_TAG_MODE_M	(0x3 << ICE_AQ_VSI_OUTER_TAG_MODE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NOTHING	0x0
+#define ICE_AQ_VSI_OUTER_TAG_REMOVE	0x1
+#define ICE_AQ_VSI_OUTER_TAG_COPY	0x2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_S	2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_M	(0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NONE	0x0
+#define ICE_AQ_VSI_OUTER_TAG_STAG	0x1
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100	0x2
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100	0x3
+#define ICE_AQ_VSI_OUTER_TAG_INSERT	BIT(4)
+#define ICE_AQ_VSI_OUTER_TAG_ACCEPT_HOST BIT(6)
+	u8 outer_tag_reserved;
+	/* queue mapping section */
+	__le16 mapping_flags;
+#define ICE_AQ_VSI_Q_MAP_CONTIG	0x0
+#define ICE_AQ_VSI_Q_MAP_NONCONTIG	BIT(0)
+	__le16 q_mapping[16];
+#define ICE_AQ_VSI_Q_S		0
+#define ICE_AQ_VSI_Q_M		(0x7FF << ICE_AQ_VSI_Q_S)
+	__le16 tc_mapping[8];
+#define ICE_AQ_VSI_TC_Q_OFFSET_S	0
+#define ICE_AQ_VSI_TC_Q_OFFSET_M	(0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S)
+#define ICE_AQ_VSI_TC_Q_NUM_S		11
+#define ICE_AQ_VSI_TC_Q_NUM_M		(0xF << ICE_AQ_VSI_TC_Q_NUM_S)
+	/* queueing option section */
+	u8 q_opt_rss;
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S	0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI	0x0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF	0x2
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL	0x3
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S	2
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M	(0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S	6
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ	(0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ	(0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_XOR	(0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_JHASH	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+	u8 q_opt_tc;
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_S	0
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_M	(0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S)
+#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR	BIT(7)
+	u8 q_opt_flags;
+#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN	BIT(0)
+	u8 q_opt_reserved[3];
+	/* outer up section */
+	__le32 outer_up_table; /* same structure and defines as ingress tbl */
+	/* section 10 */
+	__le16 sect_10_reserved;
+	/* flow director section */
+	__le16 fd_options;
+#define ICE_AQ_VSI_FD_ENABLE		BIT(0)
+#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE	BIT(1)
+#define ICE_AQ_VSI_FD_PROG_ENABLE	BIT(3)
+	__le16 max_fd_fltr_dedicated;
+	__le16 max_fd_fltr_shared;
+	__le16 fd_def_q;
+#define ICE_AQ_VSI_FD_DEF_Q_S		0
+#define ICE_AQ_VSI_FD_DEF_Q_M		(0x7FF << ICE_AQ_VSI_FD_DEF_Q_S)
+#define ICE_AQ_VSI_FD_DEF_GRP_S	12
+#define ICE_AQ_VSI_FD_DEF_GRP_M	(0x7 << ICE_AQ_VSI_FD_DEF_GRP_S)
+	__le16 fd_report_opt;
+#define ICE_AQ_VSI_FD_REPORT_Q_S	0
+#define ICE_AQ_VSI_FD_REPORT_Q_M	(0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S)
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_S	12
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_M	(0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S)
+#define ICE_AQ_VSI_FD_DEF_DROP		BIT(15)
+	/* PASID section */
+	__le32 pasid_id;
+#define ICE_AQ_VSI_PASID_ID_S		0
+#define ICE_AQ_VSI_PASID_ID_M		(0xFFFFF << ICE_AQ_VSI_PASID_ID_S)
+#define ICE_AQ_VSI_PASID_ID_VALID	BIT(31)
+	u8 reserved[24];
+};
+
+#define ICE_MAX_NUM_RECIPES 64
+
+/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
+ */
+struct ice_aqc_sw_rules {
+	/* ops: add switch rules, referring the number of rules.
+	 * ops: update switch rules, referring the number of filters
+	 * ops: remove switch rules, referring the entry index.
+	 * ops: get switch rules, referring to the number of filters.
+	 */
+	__le16 num_rules_fltr_entry_index;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Add/Update/Get/Remove lookup Rx/Tx command/response entry
+ * This structures describes the lookup rules and associated actions. "index"
+ * is returned as part of a response to a successful Add command, and can be
+ * used to identify the rule for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lkup_rx_tx {
+	__le16 recipe_id;
+#define ICE_SW_RECIPE_LOGICAL_PORT_FWD		10
+	/* Source port for LOOKUP_RX and source VSI in case of LOOKUP_TX */
+	__le16 src;
+	__le32 act;
+
+	/* Bit 0:1 - Action type */
+#define ICE_SINGLE_ACT_TYPE_S	0x00
+#define ICE_SINGLE_ACT_TYPE_M	(0x3 << ICE_SINGLE_ACT_TYPE_S)
+
+	/* Bit 2 - Loop back enable
+	 * Bit 3 - LAN enable
+	 */
+#define ICE_SINGLE_ACT_LB_ENABLE	BIT(2)
+#define ICE_SINGLE_ACT_LAN_ENABLE	BIT(3)
+
+	/* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_SINGLE_ACT_VSI_FORWARDING	0x0
+
+#define ICE_SINGLE_ACT_VSI_ID_S		4
+#define ICE_SINGLE_ACT_VSI_ID_M		(0x3FF << ICE_SINGLE_ACT_VSI_ID_S)
+#define ICE_SINGLE_ACT_VSI_LIST_ID_S	4
+#define ICE_SINGLE_ACT_VSI_LIST_ID_M	(0x3FF << ICE_SINGLE_ACT_VSI_LIST_ID_S)
+	/* This bit needs to be set if action is forward to VSI list */
+#define ICE_SINGLE_ACT_VSI_LIST		BIT(14)
+#define ICE_SINGLE_ACT_VALID_BIT	BIT(17)
+#define ICE_SINGLE_ACT_DROP		BIT(18)
+
+	/* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_SINGLE_ACT_TO_Q		0x1
+#define ICE_SINGLE_ACT_Q_INDEX_S	4
+#define ICE_SINGLE_ACT_Q_INDEX_M	(0x7FF << ICE_SINGLE_ACT_Q_INDEX_S)
+#define ICE_SINGLE_ACT_Q_REGION_S	15
+#define ICE_SINGLE_ACT_Q_REGION_M	(0x7 << ICE_SINGLE_ACT_Q_REGION_S)
+#define ICE_SINGLE_ACT_Q_PRIORITY	BIT(18)
+
+	/* Action type = 2 - Prune */
+#define ICE_SINGLE_ACT_PRUNE		0x2
+#define ICE_SINGLE_ACT_EGRESS		BIT(15)
+#define ICE_SINGLE_ACT_INGRESS		BIT(16)
+#define ICE_SINGLE_ACT_PRUNET		BIT(17)
+	/* Bit 18 should be set to 0 for this action */
+
+	/* Action type = 2 - Pointer */
+#define ICE_SINGLE_ACT_PTR		0x2
+#define ICE_SINGLE_ACT_PTR_VAL_S	4
+#define ICE_SINGLE_ACT_PTR_VAL_M	(0x1FFF << ICE_SINGLE_ACT_PTR_VAL_S)
+	/* Bit 18 should be set to 1 */
+#define ICE_SINGLE_ACT_PTR_BIT		BIT(18)
+
+	/* Action type = 3 - Other actions. Last two bits
+	 * are other action identifier
+	 */
+#define ICE_SINGLE_ACT_OTHER_ACTS		0x3
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S	17
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M	\
+				(0x3 << ICE_SINGLE_OTHER_ACT_IDENTIFIER_S)
+
+	/* Bit 17:18 - Defines other actions */
+	/* Other action = 0 - Mirror VSI */
+#define ICE_SINGLE_OTHER_ACT_MIRROR		0
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_S	4
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_M	\
+				(0x3FF << ICE_SINGLE_ACT_MIRROR_VSI_ID_S)
+
+	/* Other action = 3 - Set Stat count */
+#define ICE_SINGLE_OTHER_ACT_STAT_COUNT		3
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_S	4
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_M	\
+				(0x7F << ICE_SINGLE_ACT_STAT_COUNT_INDEX_S)
+
+	__le16 index; /* The index of the rule in the lookup table */
+	/* Length and values of the header to be matched per recipe or
+	 * lookup-type
+	 */
+	__le16 hdr_len;
+	u8 hdr[];
+};
+
+/* Add/Update/Remove large action command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the action for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lg_act {
+	__le16 index; /* Index in large action table */
+	__le16 size;
+	/* Max number of large actions */
+#define ICE_MAX_LG_ACT	4
+	/* Bit 0:1 - Action type */
+#define ICE_LG_ACT_TYPE_S	0
+#define ICE_LG_ACT_TYPE_M	(0x7 << ICE_LG_ACT_TYPE_S)
+
+	/* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_LG_ACT_VSI_FORWARDING	0
+#define ICE_LG_ACT_VSI_ID_S		3
+#define ICE_LG_ACT_VSI_ID_M		(0x3FF << ICE_LG_ACT_VSI_ID_S)
+#define ICE_LG_ACT_VSI_LIST_ID_S	3
+#define ICE_LG_ACT_VSI_LIST_ID_M	(0x3FF << ICE_LG_ACT_VSI_LIST_ID_S)
+	/* This bit needs to be set if action is forward to VSI list */
+#define ICE_LG_ACT_VSI_LIST		BIT(13)
+
+#define ICE_LG_ACT_VALID_BIT		BIT(16)
+
+	/* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_LG_ACT_TO_Q			0x1
+#define ICE_LG_ACT_Q_INDEX_S		3
+#define ICE_LG_ACT_Q_INDEX_M		(0x7FF << ICE_LG_ACT_Q_INDEX_S)
+#define ICE_LG_ACT_Q_REGION_S		14
+#define ICE_LG_ACT_Q_REGION_M		(0x7 << ICE_LG_ACT_Q_REGION_S)
+#define ICE_LG_ACT_Q_PRIORITY_SET	BIT(17)
+
+	/* Action type = 2 - Prune */
+#define ICE_LG_ACT_PRUNE		0x2
+#define ICE_LG_ACT_EGRESS		BIT(14)
+#define ICE_LG_ACT_INGRESS		BIT(15)
+#define ICE_LG_ACT_PRUNET		BIT(16)
+
+	/* Action type = 3 - Mirror VSI */
+#define ICE_LG_OTHER_ACT_MIRROR		0x3
+#define ICE_LG_ACT_MIRROR_VSI_ID_S	3
+#define ICE_LG_ACT_MIRROR_VSI_ID_M	(0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S)
+
+	/* Action type = 5 - Generic Value */
+#define ICE_LG_ACT_GENERIC		0x5
+#define ICE_LG_ACT_GENERIC_VALUE_S	3
+#define ICE_LG_ACT_GENERIC_VALUE_M	(0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S)
+#define ICE_LG_ACT_GENERIC_OFFSET_S	19
+#define ICE_LG_ACT_GENERIC_OFFSET_M	(0x7 << ICE_LG_ACT_GENERIC_OFFSET_S)
+#define ICE_LG_ACT_GENERIC_PRIORITY_S	22
+#define ICE_LG_ACT_GENERIC_PRIORITY_M	(0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S)
+#define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX	7
+
+	/* Action = 7 - Set Stat count */
+#define ICE_LG_ACT_STAT_COUNT		0x7
+#define ICE_LG_ACT_STAT_COUNT_S		3
+#define ICE_LG_ACT_STAT_COUNT_M		(0x7F << ICE_LG_ACT_STAT_COUNT_S)
+	__le32 act[]; /* array of size for actions */
+};
+
+/* Add/Update/Remove VSI list command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the VSI list for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_vsi_list {
+	__le16 index; /* Index of VSI/Prune list */
+	__le16 number_vsi;
+	__le16 vsi[]; /* Array of number_vsi VSI numbers */
+};
+
+/* Query VSI list command/response entry */
+struct ice_sw_rule_vsi_list_query {
+	__le16 index;
+	DECLARE_BITMAP(vsi_list, ICE_MAX_VSI);
+} __packed;
+
+/* Add switch rule response:
+ * Content of return buffer is same as the input buffer. The status field and
+ * LUT index are updated as part of the response
+ */
+struct ice_aqc_sw_rules_elem {
+	__le16 type; /* Switch rule type, one of T_... */
+#define ICE_AQC_SW_RULES_T_LKUP_RX		0x0
+#define ICE_AQC_SW_RULES_T_LKUP_TX		0x1
+#define ICE_AQC_SW_RULES_T_LG_ACT		0x2
+#define ICE_AQC_SW_RULES_T_VSI_LIST_SET		0x3
+#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR	0x4
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET	0x5
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR	0x6
+	__le16 status;
+	union {
+		struct ice_sw_rule_lkup_rx_tx lkup_tx_rx;
+		struct ice_sw_rule_lg_act lg_act;
+		struct ice_sw_rule_vsi_list vsi_list;
+		struct ice_sw_rule_vsi_list_query vsi_list_query;
+	} __packed pdata;
+};
+
+/* Get Default Topology (indirect 0x0400) */
+struct ice_aqc_get_topo {
+	u8 port_num;
+	u8 num_branches;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Update TSE (indirect 0x0403)
+ * Get TSE (indirect 0x0404)
+ * Add TSE (indirect 0x0401)
+ * Delete TSE (indirect 0x040F)
+ * Move TSE (indirect 0x0408)
+ * Suspend Nodes (indirect 0x0409)
+ * Resume Nodes (indirect 0x040A)
+ */
+struct ice_aqc_sched_elem_cmd {
+	__le16 num_elem_req;	/* Used by commands */
+	__le16 num_elem_resp;	/* Used by responses */
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_elem_info_bw {
+	__le16 bw_profile_idx;
+	__le16 bw_alloc;
+};
+
+struct ice_aqc_txsched_elem {
+	u8 elem_type; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_TYPE_UNDEFINED		0x0
+#define ICE_AQC_ELEM_TYPE_ROOT_PORT		0x1
+#define ICE_AQC_ELEM_TYPE_TC			0x2
+#define ICE_AQC_ELEM_TYPE_SE_GENERIC		0x3
+#define ICE_AQC_ELEM_TYPE_ENTRY_POINT		0x4
+#define ICE_AQC_ELEM_TYPE_LEAF			0x5
+#define ICE_AQC_ELEM_TYPE_SE_PADDED		0x6
+	u8 valid_sections;
+#define ICE_AQC_ELEM_VALID_GENERIC		BIT(0)
+#define ICE_AQC_ELEM_VALID_CIR			BIT(1)
+#define ICE_AQC_ELEM_VALID_EIR			BIT(2)
+#define ICE_AQC_ELEM_VALID_SHARED		BIT(3)
+	u8 generic;
+#define ICE_AQC_ELEM_GENERIC_MODE_M		0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_S		0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_M	(0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S)
+#define ICE_AQC_ELEM_GENERIC_SP_S		0x4
+#define ICE_AQC_ELEM_GENERIC_SP_M	(0x1 << ICE_AQC_ELEM_GENERIC_SP_S)
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S	0x5
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M	\
+	(0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S)
+	u8 flags; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_FLAG_SUSPEND_M		0x1
+	struct ice_aqc_elem_info_bw cir_bw;
+	struct ice_aqc_elem_info_bw eir_bw;
+	__le16 srl_id;
+	__le16 reserved2;
+};
+
+struct ice_aqc_txsched_elem_data {
+	__le32 parent_teid;
+	__le32 node_teid;
+	struct ice_aqc_txsched_elem data;
+};
+
+struct ice_aqc_txsched_topo_grp_info_hdr {
+	__le32 parent_teid;
+	__le16 num_elems;
+	__le16 reserved2;
+};
+
+struct ice_aqc_add_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	struct ice_aqc_txsched_elem_data generic[];
+};
+
+struct ice_aqc_get_topo_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	struct ice_aqc_txsched_elem_data
+		generic[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
+struct ice_aqc_delete_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	__le32 teid[];
+};
+
+/* Query Port ETS (indirect 0x040E)
+ *
+ * This indirect command is used to query port TC node configuration.
+ */
+struct ice_aqc_query_port_ets {
+	__le32 port_teid;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_port_ets_elem {
+	u8 tc_valid_bits;
+	u8 reserved[3];
+	/* 3 bits for UP per TC 0-7, 4th byte reserved */
+	__le32 up2tc;
+	u8 tc_bw_share[8];
+	__le32 port_eir_prof_id;
+	__le32 port_cir_prof_id;
+	/* 3 bits per Node priority to TC 0-7, 4th byte reserved */
+	__le32 tc_node_prio;
+#define ICE_TC_NODE_PRIO_S	0x4
+	u8 reserved1[4];
+	__le32 tc_node_teid[8]; /* Used for response, reserved in command */
+};
+
+/* Rate limiting profile for
+ * Add RL profile (indirect 0x0410)
+ * Query RL profile (indirect 0x0411)
+ * Remove RL profile (indirect 0x0415)
+ * These indirect commands acts on single or multiple
+ * RL profiles with specified data.
+ */
+struct ice_aqc_rl_profile {
+	__le16 num_profiles;
+	__le16 num_processed; /* Only for response. Reserved in Command. */
+	u8 reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_rl_profile_elem {
+	u8 level;
+	u8 flags;
+#define ICE_AQC_RL_PROFILE_TYPE_S	0x0
+#define ICE_AQC_RL_PROFILE_TYPE_M	(0x3 << ICE_AQC_RL_PROFILE_TYPE_S)
+#define ICE_AQC_RL_PROFILE_TYPE_CIR	0
+#define ICE_AQC_RL_PROFILE_TYPE_EIR	1
+#define ICE_AQC_RL_PROFILE_TYPE_SRL	2
+/* The following flag is used for Query RL Profile Data */
+#define ICE_AQC_RL_PROFILE_INVAL_S	0x7
+#define ICE_AQC_RL_PROFILE_INVAL_M	(0x1 << ICE_AQC_RL_PROFILE_INVAL_S)
+
+	__le16 profile_id;
+	__le16 max_burst_size;
+	__le16 rl_multiply;
+	__le16 wake_up_calc;
+	__le16 rl_encode;
+};
+
+/* Query Scheduler Resource Allocation (indirect 0x0412)
+ * This indirect command retrieves the scheduler resources allocated by
+ * EMP Firmware to the given PF.
+ */
+struct ice_aqc_query_txsched_res {
+	u8 reserved[8];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_generic_sched_props {
+	__le16 phys_levels;
+	__le16 logical_levels;
+	u8 flattening_bitmap;
+	u8 max_device_cgds;
+	u8 max_pf_cgds;
+	u8 rsvd0;
+	__le16 rdma_qsets;
+	u8 rsvd1[22];
+};
+
+struct ice_aqc_layer_props {
+	u8 logical_layer;
+	u8 chunk_size;
+	__le16 max_device_nodes;
+	__le16 max_pf_nodes;
+	u8 rsvd0[4];
+	__le16 max_sibl_grp_sz;
+	__le16 max_cir_rl_profiles;
+	__le16 max_eir_rl_profiles;
+	__le16 max_srl_profiles;
+	u8 rsvd1[14];
+};
+
+struct ice_aqc_query_txsched_res_resp {
+	struct ice_aqc_generic_sched_props sched_props;
+	struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
+/* Get PHY capabilities (indirect 0x0600) */
+struct ice_aqc_get_phy_caps {
+	u8 lport_num;
+	u8 reserved;
+	__le16 param0;
+	/* 18.0 - Report qualified modules */
+#define ICE_AQC_GET_PHY_RQM		BIT(0)
+	/* 18.1 - 18.2 : Report mode
+	 * 00b - Report NVM capabilities
+	 * 01b - Report topology capabilities
+	 * 10b - Report SW configured
+	 */
+#define ICE_AQC_REPORT_MODE_S			1
+#define ICE_AQC_REPORT_MODE_M			(3 << ICE_AQC_REPORT_MODE_S)
+#define ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA	0
+#define ICE_AQC_REPORT_TOPO_CAP_MEDIA		BIT(1)
+#define ICE_AQC_REPORT_ACTIVE_CFG		BIT(2)
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is #define of PHY type (Extended):
+ * The first set of defines is for phy_type_low.
+ */
+#define ICE_PHY_TYPE_LOW_100BASE_TX		BIT_ULL(0)
+#define ICE_PHY_TYPE_LOW_100M_SGMII		BIT_ULL(1)
+#define ICE_PHY_TYPE_LOW_1000BASE_T		BIT_ULL(2)
+#define ICE_PHY_TYPE_LOW_1000BASE_SX		BIT_ULL(3)
+#define ICE_PHY_TYPE_LOW_1000BASE_LX		BIT_ULL(4)
+#define ICE_PHY_TYPE_LOW_1000BASE_KX		BIT_ULL(5)
+#define ICE_PHY_TYPE_LOW_1G_SGMII		BIT_ULL(6)
+#define ICE_PHY_TYPE_LOW_2500BASE_T		BIT_ULL(7)
+#define ICE_PHY_TYPE_LOW_2500BASE_X		BIT_ULL(8)
+#define ICE_PHY_TYPE_LOW_2500BASE_KX		BIT_ULL(9)
+#define ICE_PHY_TYPE_LOW_5GBASE_T		BIT_ULL(10)
+#define ICE_PHY_TYPE_LOW_5GBASE_KR		BIT_ULL(11)
+#define ICE_PHY_TYPE_LOW_10GBASE_T		BIT_ULL(12)
+#define ICE_PHY_TYPE_LOW_10G_SFI_DA		BIT_ULL(13)
+#define ICE_PHY_TYPE_LOW_10GBASE_SR		BIT_ULL(14)
+#define ICE_PHY_TYPE_LOW_10GBASE_LR		BIT_ULL(15)
+#define ICE_PHY_TYPE_LOW_10GBASE_KR_CR1		BIT_ULL(16)
+#define ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC	BIT_ULL(17)
+#define ICE_PHY_TYPE_LOW_10G_SFI_C2C		BIT_ULL(18)
+#define ICE_PHY_TYPE_LOW_25GBASE_T		BIT_ULL(19)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR		BIT_ULL(20)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR_S		BIT_ULL(21)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR1		BIT_ULL(22)
+#define ICE_PHY_TYPE_LOW_25GBASE_SR		BIT_ULL(23)
+#define ICE_PHY_TYPE_LOW_25GBASE_LR		BIT_ULL(24)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR		BIT_ULL(25)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR_S		BIT_ULL(26)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR1		BIT_ULL(27)
+#define ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC	BIT_ULL(28)
+#define ICE_PHY_TYPE_LOW_25G_AUI_C2C		BIT_ULL(29)
+#define ICE_PHY_TYPE_LOW_40GBASE_CR4		BIT_ULL(30)
+#define ICE_PHY_TYPE_LOW_40GBASE_SR4		BIT_ULL(31)
+#define ICE_PHY_TYPE_LOW_40GBASE_LR4		BIT_ULL(32)
+#define ICE_PHY_TYPE_LOW_40GBASE_KR4		BIT_ULL(33)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC	BIT_ULL(34)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI		BIT_ULL(35)
+#define ICE_PHY_TYPE_LOW_50GBASE_CR2		BIT_ULL(36)
+#define ICE_PHY_TYPE_LOW_50GBASE_SR2		BIT_ULL(37)
+#define ICE_PHY_TYPE_LOW_50GBASE_LR2		BIT_ULL(38)
+#define ICE_PHY_TYPE_LOW_50GBASE_KR2		BIT_ULL(39)
+#define ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC	BIT_ULL(40)
+#define ICE_PHY_TYPE_LOW_50G_LAUI2		BIT_ULL(41)
+#define ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC	BIT_ULL(42)
+#define ICE_PHY_TYPE_LOW_50G_AUI2		BIT_ULL(43)
+#define ICE_PHY_TYPE_LOW_50GBASE_CP		BIT_ULL(44)
+#define ICE_PHY_TYPE_LOW_50GBASE_SR		BIT_ULL(45)
+#define ICE_PHY_TYPE_LOW_50GBASE_FR		BIT_ULL(46)
+#define ICE_PHY_TYPE_LOW_50GBASE_LR		BIT_ULL(47)
+#define ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4	BIT_ULL(48)
+#define ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC	BIT_ULL(49)
+#define ICE_PHY_TYPE_LOW_50G_AUI1		BIT_ULL(50)
+#define ICE_PHY_TYPE_LOW_100GBASE_CR4		BIT_ULL(51)
+#define ICE_PHY_TYPE_LOW_100GBASE_SR4		BIT_ULL(52)
+#define ICE_PHY_TYPE_LOW_100GBASE_LR4		BIT_ULL(53)
+#define ICE_PHY_TYPE_LOW_100GBASE_KR4		BIT_ULL(54)
+#define ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC	BIT_ULL(55)
+#define ICE_PHY_TYPE_LOW_100G_CAUI4		BIT_ULL(56)
+#define ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC	BIT_ULL(57)
+#define ICE_PHY_TYPE_LOW_100G_AUI4		BIT_ULL(58)
+#define ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4	BIT_ULL(59)
+#define ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4	BIT_ULL(60)
+#define ICE_PHY_TYPE_LOW_100GBASE_CP2		BIT_ULL(61)
+#define ICE_PHY_TYPE_LOW_100GBASE_SR2		BIT_ULL(62)
+#define ICE_PHY_TYPE_LOW_100GBASE_DR		BIT_ULL(63)
+#define ICE_PHY_TYPE_LOW_MAX_INDEX		63
+/* The second set of defines is for phy_type_high. */
+#define ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4	BIT_ULL(0)
+#define ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC	BIT_ULL(1)
+#define ICE_PHY_TYPE_HIGH_100G_CAUI2		BIT_ULL(2)
+#define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC	BIT_ULL(3)
+#define ICE_PHY_TYPE_HIGH_100G_AUI2		BIT_ULL(4)
+#define ICE_PHY_TYPE_HIGH_MAX_INDEX		5
+
+struct ice_aqc_get_phy_caps_data {
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+	u8 caps;
+#define ICE_AQC_PHY_EN_TX_LINK_PAUSE			BIT(0)
+#define ICE_AQC_PHY_EN_RX_LINK_PAUSE			BIT(1)
+#define ICE_AQC_PHY_LOW_POWER_MODE			BIT(2)
+#define ICE_AQC_PHY_EN_LINK				BIT(3)
+#define ICE_AQC_PHY_AN_MODE				BIT(4)
+#define ICE_AQC_GET_PHY_EN_MOD_QUAL			BIT(5)
+#define ICE_AQC_PHY_EN_AUTO_FEC				BIT(7)
+#define ICE_AQC_PHY_CAPS_MASK				ICE_M(0xff, 0)
+	u8 low_power_ctrl_an;
+#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG		BIT(0)
+#define ICE_AQC_PHY_AN_EN_CLAUSE28			BIT(1)
+#define ICE_AQC_PHY_AN_EN_CLAUSE73			BIT(2)
+#define ICE_AQC_PHY_AN_EN_CLAUSE37			BIT(3)
+	__le16 eee_cap;
+#define ICE_AQC_PHY_EEE_EN_100BASE_TX			BIT(0)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_T			BIT(1)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_T			BIT(2)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_KX			BIT(3)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_KR			BIT(4)
+#define ICE_AQC_PHY_EEE_EN_25GBASE_KR			BIT(5)
+#define ICE_AQC_PHY_EEE_EN_40GBASE_KR4			BIT(6)
+	__le16 eeer_value;
+	u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+	u8 phy_fw_ver[8];
+	u8 link_fec_options;
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN		BIT(0)
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ		BIT(1)
+#define ICE_AQC_PHY_FEC_25G_RS_528_REQ			BIT(2)
+#define ICE_AQC_PHY_FEC_25G_KR_REQ			BIT(3)
+#define ICE_AQC_PHY_FEC_25G_RS_544_REQ			BIT(4)
+#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN		BIT(6)
+#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN		BIT(7)
+#define ICE_AQC_PHY_FEC_MASK				ICE_M(0xdf, 0)
+	u8 module_compliance_enforcement;
+#define ICE_AQC_MOD_ENFORCE_STRICT_MODE			BIT(0)
+	u8 extended_compliance_code;
+#define ICE_MODULE_TYPE_TOTAL_BYTE			3
+	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+#define ICE_AQC_MOD_TYPE_BYTE0_SFP_PLUS			0xA0
+#define ICE_AQC_MOD_TYPE_BYTE0_QSFP_PLUS		0x80
+#define ICE_AQC_MOD_TYPE_IDENT				1
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE	BIT(0)
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE	BIT(1)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_SR		BIT(4)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LR		BIT(5)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LRM		BIT(6)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_ER		BIT(7)
+#define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS			0xA0
+#define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS		0x86
+	u8 qualified_module_count;
+	u8 rsvd2[7];	/* Bytes 47:41 reserved */
+#define ICE_AQC_QUAL_MOD_COUNT_MAX			16
+	struct {
+		u8 v_oui[3];
+		u8 rsvd3;
+		u8 v_part[16];
+		__le32 v_rev;
+		__le64 rsvd4;
+	} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
+};
+
+/* Set PHY capabilities (direct 0x0601)
+ * NOTE: This command must be followed by setup link and restart auto-neg
+ */
+struct ice_aqc_set_phy_cfg {
+	u8 lport_num;
+	u8 reserved[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Set PHY config command data structure */
+struct ice_aqc_set_phy_cfg_data {
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+	u8 caps;
+#define ICE_AQ_PHY_ENA_VALID_MASK	ICE_M(0xef, 0)
+#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY	BIT(0)
+#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY	BIT(1)
+#define ICE_AQ_PHY_ENA_LOW_POWER	BIT(2)
+#define ICE_AQ_PHY_ENA_LINK		BIT(3)
+#define ICE_AQ_PHY_ENA_AUTO_LINK_UPDT	BIT(5)
+#define ICE_AQ_PHY_ENA_LESM		BIT(6)
+#define ICE_AQ_PHY_ENA_AUTO_FEC		BIT(7)
+	u8 low_power_ctrl_an;
+	__le16 eee_cap; /* Value from ice_aqc_get_phy_caps */
+	__le16 eeer_value;
+	u8 link_fec_opt; /* Use defines from ice_aqc_get_phy_caps */
+	u8 module_compliance_enforcement;
+};
+
+/* Set MAC Config command data structure (direct 0x0603) */
+struct ice_aqc_set_mac_cfg {
+	__le16 max_frame_size;
+	u8 params;
+#define ICE_AQ_SET_MAC_PACE_S		3
+#define ICE_AQ_SET_MAC_PACE_M		(0xF << ICE_AQ_SET_MAC_PACE_S)
+#define ICE_AQ_SET_MAC_PACE_TYPE_M	BIT(7)
+#define ICE_AQ_SET_MAC_PACE_TYPE_RATE	0
+#define ICE_AQ_SET_MAC_PACE_TYPE_FIXED	ICE_AQ_SET_MAC_PACE_TYPE_M
+	u8 tx_tmr_priority;
+	__le16 tx_tmr_value;
+	__le16 fc_refresh_threshold;
+	u8 drop_opts;
+#define ICE_AQ_SET_MAC_AUTO_DROP_MASK		BIT(0)
+#define ICE_AQ_SET_MAC_AUTO_DROP_NONE		0
+#define ICE_AQ_SET_MAC_AUTO_DROP_BLOCKING_PKTS	BIT(0)
+	u8 reserved[7];
+};
+
+/* Restart AN command data structure (direct 0x0605)
+ * Also used for response, with only the lport_num field present.
+ */
+struct ice_aqc_restart_an {
+	u8 lport_num;
+	u8 reserved;
+	u8 cmd_flags;
+#define ICE_AQC_RESTART_AN_LINK_RESTART	BIT(1)
+#define ICE_AQC_RESTART_AN_LINK_ENABLE	BIT(2)
+	u8 reserved2[13];
+};
+
+/* Get link status (indirect 0x0607), also used for Link Status Event */
+struct ice_aqc_get_link_status {
+	u8 lport_num;
+	u8 reserved;
+	__le16 cmd_flags;
+#define ICE_AQ_LSE_M			0x3
+#define ICE_AQ_LSE_NOP			0x0
+#define ICE_AQ_LSE_DIS			0x2
+#define ICE_AQ_LSE_ENA			0x3
+	/* only response uses this flag */
+#define ICE_AQ_LSE_IS_ENABLED		0x1
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get link status response data structure, also used for Link Status Event */
+struct ice_aqc_get_link_status_data {
+	u8 topo_media_conflict;
+#define ICE_AQ_LINK_TOPO_CONFLICT	BIT(0)
+#define ICE_AQ_LINK_MEDIA_CONFLICT	BIT(1)
+#define ICE_AQ_LINK_TOPO_CORRUPT	BIT(2)
+#define ICE_AQ_LINK_TOPO_UNREACH_PRT	BIT(4)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT	BIT(5)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA	BIT(6)
+#define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA	BIT(7)
+	u8 reserved1;
+	u8 link_info;
+#define ICE_AQ_LINK_UP			BIT(0)	/* Link Status */
+#define ICE_AQ_LINK_FAULT		BIT(1)
+#define ICE_AQ_LINK_FAULT_TX		BIT(2)
+#define ICE_AQ_LINK_FAULT_RX		BIT(3)
+#define ICE_AQ_LINK_FAULT_REMOTE	BIT(4)
+#define ICE_AQ_LINK_UP_PORT		BIT(5)	/* External Port Link Status */
+#define ICE_AQ_MEDIA_AVAILABLE		BIT(6)
+#define ICE_AQ_SIGNAL_DETECT		BIT(7)
+	u8 an_info;
+#define ICE_AQ_AN_COMPLETED		BIT(0)
+#define ICE_AQ_LP_AN_ABILITY		BIT(1)
+#define ICE_AQ_PD_FAULT			BIT(2)	/* Parallel Detection Fault */
+#define ICE_AQ_FEC_EN			BIT(3)
+#define ICE_AQ_PHY_LOW_POWER		BIT(4)	/* Low Power State */
+#define ICE_AQ_LINK_PAUSE_TX		BIT(5)
+#define ICE_AQ_LINK_PAUSE_RX		BIT(6)
+#define ICE_AQ_QUALIFIED_MODULE		BIT(7)
+	u8 ext_info;
+#define ICE_AQ_LINK_PHY_TEMP_ALARM	BIT(0)
+#define ICE_AQ_LINK_EXCESSIVE_ERRORS	BIT(1)	/* Excessive Link Errors */
+	/* Port Tx Suspended */
+#define ICE_AQ_LINK_TX_S		2
+#define ICE_AQ_LINK_TX_M		(0x03 << ICE_AQ_LINK_TX_S)
+#define ICE_AQ_LINK_TX_ACTIVE		0
+#define ICE_AQ_LINK_TX_DRAINED		1
+#define ICE_AQ_LINK_TX_FLUSHED		3
+	u8 reserved2;
+	__le16 max_frame_size;
+	u8 cfg;
+#define ICE_AQ_LINK_25G_KR_FEC_EN	BIT(0)
+#define ICE_AQ_LINK_25G_RS_528_FEC_EN	BIT(1)
+#define ICE_AQ_LINK_25G_RS_544_FEC_EN	BIT(2)
+#define ICE_AQ_FEC_MASK			ICE_M(0x7, 0)
+	/* Pacing Config */
+#define ICE_AQ_CFG_PACING_S		3
+#define ICE_AQ_CFG_PACING_M		(0xF << ICE_AQ_CFG_PACING_S)
+#define ICE_AQ_CFG_PACING_TYPE_M	BIT(7)
+#define ICE_AQ_CFG_PACING_TYPE_AVG	0
+#define ICE_AQ_CFG_PACING_TYPE_FIXED	ICE_AQ_CFG_PACING_TYPE_M
+	/* External Device Power Ability */
+	u8 power_desc;
+#define ICE_AQ_PWR_CLASS_M		0x3
+#define ICE_AQ_LINK_PWR_BASET_LOW_HIGH	0
+#define ICE_AQ_LINK_PWR_BASET_HIGH	1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_1	0
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_2	1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_3	2
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_4	3
+	__le16 link_speed;
+#define ICE_AQ_LINK_SPEED_M		0x7FF
+#define ICE_AQ_LINK_SPEED_10MB		BIT(0)
+#define ICE_AQ_LINK_SPEED_100MB		BIT(1)
+#define ICE_AQ_LINK_SPEED_1000MB	BIT(2)
+#define ICE_AQ_LINK_SPEED_2500MB	BIT(3)
+#define ICE_AQ_LINK_SPEED_5GB		BIT(4)
+#define ICE_AQ_LINK_SPEED_10GB		BIT(5)
+#define ICE_AQ_LINK_SPEED_20GB		BIT(6)
+#define ICE_AQ_LINK_SPEED_25GB		BIT(7)
+#define ICE_AQ_LINK_SPEED_40GB		BIT(8)
+#define ICE_AQ_LINK_SPEED_50GB		BIT(9)
+#define ICE_AQ_LINK_SPEED_100GB		BIT(10)
+#define ICE_AQ_LINK_SPEED_UNKNOWN	BIT(15)
+	__le32 reserved3; /* Aligns next field to 8-byte boundary */
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+};
+
+/* Set event mask command (direct 0x0613) */
+struct ice_aqc_set_event_mask {
+	u8	lport_num;
+	u8	reserved[7];
+	__le16	event_mask;
+#define ICE_AQ_LINK_EVENT_UPDOWN		BIT(1)
+#define ICE_AQ_LINK_EVENT_MEDIA_NA		BIT(2)
+#define ICE_AQ_LINK_EVENT_LINK_FAULT		BIT(3)
+#define ICE_AQ_LINK_EVENT_PHY_TEMP_ALARM	BIT(4)
+#define ICE_AQ_LINK_EVENT_EXCESSIVE_ERRORS	BIT(5)
+#define ICE_AQ_LINK_EVENT_SIGNAL_DETECT		BIT(6)
+#define ICE_AQ_LINK_EVENT_AN_COMPLETED		BIT(7)
+#define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL	BIT(8)
+#define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED	BIT(9)
+	u8	reserved1[6];
+};
+
+/* Set MAC Loopback command (direct 0x0620) */
+struct ice_aqc_set_mac_lb {
+	u8 lb_mode;
+#define ICE_AQ_MAC_LB_EN		BIT(0)
+#define ICE_AQ_MAC_LB_OSC_CLK		BIT(1)
+	u8 reserved[15];
+};
+
+struct ice_aqc_link_topo_addr {
+	u8 lport_num;
+	u8 lport_num_valid;
+#define ICE_AQC_LINK_TOPO_PORT_NUM_VALID	BIT(0)
+	u8 node_type_ctx;
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_S		0
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_M	(0xF << ICE_AQC_LINK_TOPO_NODE_TYPE_S)
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_PHY		0
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL	1
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_MUX_CTRL	2
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_LED_CTRL	3
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_LED		4
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_THERMAL	5
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE	6
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_MEZZ	7
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_ID_EEPROM	8
+#define ICE_AQC_LINK_TOPO_NODE_CTX_S		4
+#define ICE_AQC_LINK_TOPO_NODE_CTX_M		\
+				(0xF << ICE_AQC_LINK_TOPO_NODE_CTX_S)
+#define ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL	0
+#define ICE_AQC_LINK_TOPO_NODE_CTX_BOARD	1
+#define ICE_AQC_LINK_TOPO_NODE_CTX_PORT		2
+#define ICE_AQC_LINK_TOPO_NODE_CTX_NODE		3
+#define ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED	4
+#define ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE	5
+	u8 index;
+	__le16 handle;
+#define ICE_AQC_LINK_TOPO_HANDLE_S	0
+#define ICE_AQC_LINK_TOPO_HANDLE_M	(0x3FF << ICE_AQC_LINK_TOPO_HANDLE_S)
+/* Used to decode the handle field */
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_M	BIT(9)
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_LOM	BIT(9)
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_MEZZ	0
+#define ICE_AQC_LINK_TOPO_HANDLE_NODE_S		0
+/* In case of a Mezzanine type */
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_NODE_M	\
+				(0x3F << ICE_AQC_LINK_TOPO_HANDLE_NODE_S)
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_S	6
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_M	(0x7 << ICE_AQC_LINK_TOPO_HANDLE_MEZZ_S)
+/* In case of a LOM type */
+#define ICE_AQC_LINK_TOPO_HANDLE_LOM_NODE_M	\
+				(0x1FF << ICE_AQC_LINK_TOPO_HANDLE_NODE_S)
+};
+
+/* Get Link Topology Handle (direct, 0x06E0) */
+struct ice_aqc_get_link_topo {
+	struct ice_aqc_link_topo_addr addr;
+	u8 node_part_num;
+	u8 rsvd[9];
+};
+
+/* Set Port Identification LED (direct, 0x06E9) */
+struct ice_aqc_set_port_id_led {
+	u8 lport_num;
+	u8 lport_num_valid;
+	u8 ident_mode;
+#define ICE_AQC_PORT_IDENT_LED_BLINK	BIT(0)
+#define ICE_AQC_PORT_IDENT_LED_ORIG	0
+	u8 rsvd[13];
+};
+
+/* Read/Write SFF EEPROM command (indirect 0x06EE) */
+struct ice_aqc_sff_eeprom {
+	u8 lport_num;
+	u8 lport_num_valid;
+#define ICE_AQC_SFF_PORT_NUM_VALID	BIT(0)
+	__le16 i2c_bus_addr;
+#define ICE_AQC_SFF_I2CBUS_7BIT_M	0x7F
+#define ICE_AQC_SFF_I2CBUS_10BIT_M	0x3FF
+#define ICE_AQC_SFF_I2CBUS_TYPE_M	BIT(10)
+#define ICE_AQC_SFF_I2CBUS_TYPE_7BIT	0
+#define ICE_AQC_SFF_I2CBUS_TYPE_10BIT	ICE_AQC_SFF_I2CBUS_TYPE_M
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_S	11
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_M	(0x3 << ICE_AQC_SFF_SET_EEPROM_PAGE_S)
+#define ICE_AQC_SFF_NO_PAGE_CHANGE	0
+#define ICE_AQC_SFF_SET_23_ON_MISMATCH	1
+#define ICE_AQC_SFF_SET_22_ON_MISMATCH	2
+#define ICE_AQC_SFF_IS_WRITE		BIT(15)
+	__le16 i2c_mem_addr;
+	__le16 eeprom_page;
+#define  ICE_AQC_SFF_EEPROM_BANK_S 0
+#define  ICE_AQC_SFF_EEPROM_BANK_M (0xFF << ICE_AQC_SFF_EEPROM_BANK_S)
+#define  ICE_AQC_SFF_EEPROM_PAGE_S 8
+#define  ICE_AQC_SFF_EEPROM_PAGE_M (0xFF << ICE_AQC_SFF_EEPROM_PAGE_S)
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* NVM Read command (indirect 0x0701)
+ * NVM Erase commands (direct 0x0702)
+ * NVM Update commands (indirect 0x0703)
+ */
+struct ice_aqc_nvm {
+#define ICE_AQC_NVM_MAX_OFFSET		0xFFFFFF
+	__le16 offset_low;
+	u8 offset_high;
+	u8 cmd_flags;
+#define ICE_AQC_NVM_LAST_CMD		BIT(0)
+#define ICE_AQC_NVM_PCIR_REQ		BIT(0)	/* Used by NVM Update reply */
+#define ICE_AQC_NVM_PRESERVATION_S	1
+#define ICE_AQC_NVM_PRESERVATION_M	(3 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_NO_PRESERVATION	(0 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_ALL	BIT(1)
+#define ICE_AQC_NVM_FACTORY_DEFAULT	(2 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_SELECTED	(3 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_ACTIV_SEL_NVM	BIT(3) /* Write Activate/SR Dump only */
+#define ICE_AQC_NVM_ACTIV_SEL_OROM	BIT(4)
+#define ICE_AQC_NVM_ACTIV_SEL_NETLIST	BIT(5)
+#define ICE_AQC_NVM_SPECIAL_UPDATE	BIT(6)
+#define ICE_AQC_NVM_REVERT_LAST_ACTIV	BIT(6) /* Write Activate only */
+#define ICE_AQC_NVM_ACTIV_SEL_MASK	ICE_M(0x7, 3)
+#define ICE_AQC_NVM_FLASH_ONLY		BIT(7)
+	__le16 module_typeid;
+	__le16 length;
+#define ICE_AQC_NVM_ERASE_LEN	0xFFFF
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+#define ICE_AQC_NVM_START_POINT			0
+
+/* NVM Checksum Command (direct, 0x0706) */
+struct ice_aqc_nvm_checksum {
+	u8 flags;
+#define ICE_AQC_NVM_CHECKSUM_VERIFY	BIT(0)
+#define ICE_AQC_NVM_CHECKSUM_RECALC	BIT(1)
+	u8 rsvd;
+	__le16 checksum; /* Used only by response */
+#define ICE_AQC_NVM_CHECKSUM_CORRECT	0xBABA
+	u8 rsvd2[12];
+};
+
+/* The result of netlist NVM read comes in a TLV format. The actual data
+ * (netlist header) starts from word offset 1 (byte 2). The FW strips
+ * out the type field from the TLV header so all the netlist fields
+ * should adjust their offset value by 1 word (2 bytes) in order to map
+ * their correct location.
+ */
+#define ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID		0x11B
+#define ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN_OFFSET	1
+#define ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN		2 /* In bytes */
+#define ICE_AQC_NVM_NETLIST_NODE_COUNT_OFFSET		2
+#define ICE_AQC_NVM_NETLIST_NODE_COUNT_LEN		2 /* In bytes */
+#define ICE_AQC_NVM_NETLIST_NODE_COUNT_M		ICE_M(0x3FF, 0)
+#define ICE_AQC_NVM_NETLIST_ID_BLK_START_OFFSET		5
+#define ICE_AQC_NVM_NETLIST_ID_BLK_LEN			0x30 /* In words */
+
+/* netlist ID block field offsets (word offsets) */
+#define ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_LOW	2
+#define ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_HIGH	3
+#define ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_LOW	4
+#define ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_HIGH	5
+#define ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_LOW		6
+#define ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_HIGH		7
+#define ICE_AQC_NVM_NETLIST_ID_BLK_REV_LOW		8
+#define ICE_AQC_NVM_NETLIST_ID_BLK_REV_HIGH		9
+#define ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH		0xA
+#define ICE_AQC_NVM_NETLIST_ID_BLK_CUST_VER		0x2F
+
+/* Used for NVM Set Package Data command - 0x070A */
+struct ice_aqc_nvm_pkg_data {
+	u8 reserved[3];
+	u8 cmd_flags;
+#define ICE_AQC_NVM_PKG_DELETE		BIT(0) /* used for command call */
+#define ICE_AQC_NVM_PKG_SKIPPED		BIT(0) /* used for command response */
+
+	u32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Used for Pass Component Table command - 0x070B */
+struct ice_aqc_nvm_pass_comp_tbl {
+	u8 component_response; /* Response only */
+#define ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED		0x0
+#define ICE_AQ_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE	0x1
+#define ICE_AQ_NVM_PASS_COMP_CAN_NOT_BE_UPDATED		0x2
+	u8 component_response_code; /* Response only */
+#define ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED_CODE	0x0
+#define ICE_AQ_NVM_PASS_COMP_STAMP_IDENTICAL_CODE	0x1
+#define ICE_AQ_NVM_PASS_COMP_STAMP_LOWER		0x2
+#define ICE_AQ_NVM_PASS_COMP_INVALID_STAMP_CODE		0x3
+#define ICE_AQ_NVM_PASS_COMP_CONFLICT_CODE		0x4
+#define ICE_AQ_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE	0x5
+#define ICE_AQ_NVM_PASS_COMP_NOT_SUPPORTED_CODE		0x6
+#define ICE_AQ_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE	0x7
+#define ICE_AQ_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE	0x8
+#define ICE_AQ_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE	0xA
+#define ICE_AQ_NVM_PASS_COMP_VER_STR_LOWER_CODE		0xB
+	u8 reserved;
+	u8 transfer_flag;
+#define ICE_AQ_NVM_PASS_COMP_TBL_START			0x1
+#define ICE_AQ_NVM_PASS_COMP_TBL_MIDDLE			0x2
+#define ICE_AQ_NVM_PASS_COMP_TBL_END			0x4
+#define ICE_AQ_NVM_PASS_COMP_TBL_START_AND_END		0x5
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_nvm_comp_tbl {
+	__le16 comp_class;
+#define NVM_COMP_CLASS_ALL_FW	0x000A
+
+	__le16 comp_id;
+#define NVM_COMP_ID_OROM	0x5
+#define NVM_COMP_ID_NVM		0x6
+#define NVM_COMP_ID_NETLIST	0x8
+
+	u8 comp_class_idx;
+#define FWU_COMP_CLASS_IDX_NOT_USE 0x0
+
+	__le32 comp_cmp_stamp;
+	u8 cvs_type;
+#define NVM_CVS_TYPE_ASCII	0x1
+
+	u8 cvs_len;
+	u8 cvs[]; /* Component Version String */
+} __packed;
+
+/*
+ * Send to PF command (indirect 0x0801) ID is only used by PF
+ *
+ * Send to VF command (indirect 0x0802) ID is only used by PF
+ *
+ */
+struct ice_aqc_pf_vf_msg {
+	__le32 id;
+	u32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get LLDP MIB (indirect 0x0A00)
+ * Note: This is also used by the LLDP MIB Change Event (0x0A01)
+ * as the format is the same.
+ */
+struct ice_aqc_lldp_get_mib {
+	u8 type;
+#define ICE_AQ_LLDP_MIB_TYPE_S			0
+#define ICE_AQ_LLDP_MIB_TYPE_M			(0x3 << ICE_AQ_LLDP_MIB_TYPE_S)
+#define ICE_AQ_LLDP_MIB_LOCAL			0
+#define ICE_AQ_LLDP_MIB_REMOTE			1
+#define ICE_AQ_LLDP_MIB_LOCAL_AND_REMOTE	2
+#define ICE_AQ_LLDP_BRID_TYPE_S			2
+#define ICE_AQ_LLDP_BRID_TYPE_M			(0x3 << ICE_AQ_LLDP_BRID_TYPE_S)
+#define ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID	0
+#define ICE_AQ_LLDP_BRID_TYPE_NON_TPMR		1
+/* Tx pause flags in the 0xA01 event use ICE_AQ_LLDP_TX_* */
+#define ICE_AQ_LLDP_TX_S			0x4
+#define ICE_AQ_LLDP_TX_M			(0x03 << ICE_AQ_LLDP_TX_S)
+#define ICE_AQ_LLDP_TX_ACTIVE			0
+#define ICE_AQ_LLDP_TX_SUSPENDED		1
+#define ICE_AQ_LLDP_TX_FLUSHED			3
+/* The following bytes are reserved for the Get LLDP MIB command (0x0A00)
+ * and in the LLDP MIB Change Event (0x0A01). They are valid for the
+ * Get LLDP MIB (0x0A00) response only.
+ */
+	u8 reserved1;
+	__le16 local_len;
+	__le16 remote_len;
+	u8 reserved2[2];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Configure LLDP MIB Change Event (direct 0x0A01) */
+/* For MIB Change Event use ice_aqc_lldp_get_mib structure above */
+struct ice_aqc_lldp_set_mib_change {
+	u8 command;
+#define ICE_AQ_LLDP_MIB_UPDATE_ENABLE		0x0
+#define ICE_AQ_LLDP_MIB_UPDATE_DIS		0x1
+	u8 reserved[15];
+};
+
+/* Stop LLDP (direct 0x0A05) */
+struct ice_aqc_lldp_stop {
+	u8 command;
+#define ICE_AQ_LLDP_AGENT_STATE_MASK	BIT(0)
+#define ICE_AQ_LLDP_AGENT_STOP		0x0
+#define ICE_AQ_LLDP_AGENT_SHUTDOWN	ICE_AQ_LLDP_AGENT_STATE_MASK
+#define ICE_AQ_LLDP_AGENT_PERSIST_DIS	BIT(1)
+	u8 reserved[15];
+};
+
+/* Start LLDP (direct 0x0A06) */
+struct ice_aqc_lldp_start {
+	u8 command;
+#define ICE_AQ_LLDP_AGENT_START		BIT(0)
+#define ICE_AQ_LLDP_AGENT_PERSIST_ENA	BIT(1)
+	u8 reserved[15];
+};
+
+/* Get CEE DCBX Oper Config (0x0A07)
+ * The command uses the generic descriptor struct and
+ * returns the struct below as an indirect response.
+ */
+struct ice_aqc_get_cee_dcb_cfg_resp {
+	u8 oper_num_tc;
+	u8 oper_prio_tc[4];
+	u8 oper_tc_bw[8];
+	u8 oper_pfc_en;
+	__le16 oper_app_prio;
+#define ICE_AQC_CEE_APP_FCOE_S		0
+#define ICE_AQC_CEE_APP_FCOE_M		(0x7 << ICE_AQC_CEE_APP_FCOE_S)
+#define ICE_AQC_CEE_APP_ISCSI_S		3
+#define ICE_AQC_CEE_APP_ISCSI_M		(0x7 << ICE_AQC_CEE_APP_ISCSI_S)
+#define ICE_AQC_CEE_APP_FIP_S		8
+#define ICE_AQC_CEE_APP_FIP_M		(0x7 << ICE_AQC_CEE_APP_FIP_S)
+	__le32 tlv_status;
+#define ICE_AQC_CEE_PG_STATUS_S		0
+#define ICE_AQC_CEE_PG_STATUS_M		(0x7 << ICE_AQC_CEE_PG_STATUS_S)
+#define ICE_AQC_CEE_PFC_STATUS_S	3
+#define ICE_AQC_CEE_PFC_STATUS_M	(0x7 << ICE_AQC_CEE_PFC_STATUS_S)
+#define ICE_AQC_CEE_FCOE_STATUS_S	8
+#define ICE_AQC_CEE_FCOE_STATUS_M	(0x7 << ICE_AQC_CEE_FCOE_STATUS_S)
+#define ICE_AQC_CEE_ISCSI_STATUS_S	11
+#define ICE_AQC_CEE_ISCSI_STATUS_M	(0x7 << ICE_AQC_CEE_ISCSI_STATUS_S)
+#define ICE_AQC_CEE_FIP_STATUS_S	16
+#define ICE_AQC_CEE_FIP_STATUS_M	(0x7 << ICE_AQC_CEE_FIP_STATUS_S)
+	u8 reserved[12];
+};
+
+/* Set Local LLDP MIB (indirect 0x0A08)
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBX
+ */
+struct ice_aqc_lldp_set_local_mib {
+	u8 type;
+#define SET_LOCAL_MIB_TYPE_DCBX_M		BIT(0)
+#define SET_LOCAL_MIB_TYPE_LOCAL_MIB		0
+#define SET_LOCAL_MIB_TYPE_CEE_M		BIT(1)
+#define SET_LOCAL_MIB_TYPE_CEE_WILLING		0
+#define SET_LOCAL_MIB_TYPE_CEE_NON_WILLING	SET_LOCAL_MIB_TYPE_CEE_M
+	u8 reserved0;
+	__le16 length;
+	u8 reserved1[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Stop/Start LLDP Agent (direct 0x0A09)
+ * Used for stopping/starting specific LLDP agent. e.g. DCBX.
+ * The same structure is used for the response, with the command field
+ * being used as the status field.
+ */
+struct ice_aqc_lldp_stop_start_specific_agent {
+	u8 command;
+#define ICE_AQC_START_STOP_AGENT_M		BIT(0)
+#define ICE_AQC_START_STOP_AGENT_STOP_DCBX	0
+#define ICE_AQC_START_STOP_AGENT_START_DCBX	ICE_AQC_START_STOP_AGENT_M
+	u8 reserved[15];
+};
+
+/* Get/Set RSS key (indirect 0x0B04/0x0B02) */
+struct ice_aqc_get_set_rss_key {
+#define ICE_AQC_GSET_RSS_KEY_VSI_VALID	BIT(15)
+#define ICE_AQC_GSET_RSS_KEY_VSI_ID_S	0
+#define ICE_AQC_GSET_RSS_KEY_VSI_ID_M	(0x3FF << ICE_AQC_GSET_RSS_KEY_VSI_ID_S)
+	__le16 vsi_id;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE	0x28
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE	0xC
+#define ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE \
+				(ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE + \
+				 ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE)
+
+struct ice_aqc_get_set_rss_keys {
+	u8 standard_rss_key[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
+	u8 extended_hash_key[ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE];
+};
+
+/* Get/Set RSS LUT (indirect 0x0B05/0x0B03) */
+struct ice_aqc_get_set_rss_lut {
+#define ICE_AQC_GSET_RSS_LUT_VSI_VALID	BIT(15)
+#define ICE_AQC_GSET_RSS_LUT_VSI_ID_S	0
+#define ICE_AQC_GSET_RSS_LUT_VSI_ID_M	(0x3FF << ICE_AQC_GSET_RSS_LUT_VSI_ID_S)
+	__le16 vsi_id;
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S	0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M	\
+				(0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S)
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI	 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF	 1
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL	 2
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S	 2
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M	 \
+				(0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S)
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128	 128
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512	 512
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG 1
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K	 2048
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG	 2
+
+#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S	 4
+#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M	 \
+				(0xF << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S)
+
+	__le16 flags;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Add Tx LAN Queues (indirect 0x0C30) */
+struct ice_aqc_add_txqs {
+	u8 num_qgrps;
+	u8 reserved[3];
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is the descriptor of each queue entry for the Add Tx LAN Queues
+ * command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp.
+ */
+struct ice_aqc_add_txqs_perq {
+	__le16 txq_id;
+	u8 rsvd[2];
+	__le32 q_teid;
+	u8 txq_ctx[22];
+	u8 rsvd2[2];
+	struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add Tx LAN Queues (0x0C30)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_tx_qgrp is variable due
+ * to the variable number of queues in each group!
+ */
+struct ice_aqc_add_tx_qgrp {
+	__le32 parent_teid;
+	u8 num_txqs;
+	u8 rsvd[3];
+	struct ice_aqc_add_txqs_perq txqs[];
+};
+
+/* Disable Tx LAN Queues (indirect 0x0C31) */
+struct ice_aqc_dis_txqs {
+	u8 cmd_type;
+#define ICE_AQC_Q_DIS_CMD_S		0
+#define ICE_AQC_Q_DIS_CMD_M		(0x3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_NO_FUNC_RESET	(0 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VM_RESET	BIT(ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VF_RESET	(2 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_PF_RESET	(3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_SUBSEQ_CALL	BIT(2)
+#define ICE_AQC_Q_DIS_CMD_FLUSH_PIPE	BIT(3)
+	u8 num_entries;
+	__le16 vmvf_and_timeout;
+#define ICE_AQC_Q_DIS_VMVF_NUM_S	0
+#define ICE_AQC_Q_DIS_VMVF_NUM_M	(0x3FF << ICE_AQC_Q_DIS_VMVF_NUM_S)
+#define ICE_AQC_Q_DIS_TIMEOUT_S		10
+#define ICE_AQC_Q_DIS_TIMEOUT_M		(0x3F << ICE_AQC_Q_DIS_TIMEOUT_S)
+	__le32 blocked_cgds;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* The buffer for Disable Tx LAN Queues (indirect 0x0C31)
+ * contains the following structures, arrayed one after the
+ * other.
+ * Note: Since the q_id is 16 bits wide, if the
+ * number of queues is even, then 2 bytes of alignment MUST be
+ * added before the start of the next group, to allow correct
+ * alignment of the parent_teid field.
+ */
+struct ice_aqc_dis_txq_item {
+	__le32 parent_teid;
+	u8 num_qs;
+	u8 rsvd;
+	/* The length of the q_id array varies according to num_qs */
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S		15
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_LAN_Q	\
+			(0 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET	\
+			(1 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+	__le16 q_id[];
+} __packed;
+
+/* Configure Firmware Logging Command (indirect 0xFF09)
+ * Logging Information Read Response (indirect 0xFF10)
+ * Note: The 0xFF10 command has no input parameters.
+ */
+struct ice_aqc_fw_logging {
+	u8 log_ctrl;
+#define ICE_AQC_FW_LOG_AQ_EN		BIT(0)
+#define ICE_AQC_FW_LOG_UART_EN		BIT(1)
+	u8 rsvd0;
+	u8 log_ctrl_valid; /* Not used by 0xFF10 Response */
+#define ICE_AQC_FW_LOG_AQ_VALID		BIT(0)
+#define ICE_AQC_FW_LOG_UART_VALID	BIT(1)
+	u8 rsvd1[5];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+enum ice_aqc_fw_logging_mod {
+	ICE_AQC_FW_LOG_ID_GENERAL = 0,
+	ICE_AQC_FW_LOG_ID_CTRL,
+	ICE_AQC_FW_LOG_ID_LINK,
+	ICE_AQC_FW_LOG_ID_LINK_TOPO,
+	ICE_AQC_FW_LOG_ID_DNL,
+	ICE_AQC_FW_LOG_ID_I2C,
+	ICE_AQC_FW_LOG_ID_SDP,
+	ICE_AQC_FW_LOG_ID_MDIO,
+	ICE_AQC_FW_LOG_ID_ADMINQ,
+	ICE_AQC_FW_LOG_ID_HDMA,
+	ICE_AQC_FW_LOG_ID_LLDP,
+	ICE_AQC_FW_LOG_ID_DCBX,
+	ICE_AQC_FW_LOG_ID_DCB,
+	ICE_AQC_FW_LOG_ID_NETPROXY,
+	ICE_AQC_FW_LOG_ID_NVM,
+	ICE_AQC_FW_LOG_ID_AUTH,
+	ICE_AQC_FW_LOG_ID_VPD,
+	ICE_AQC_FW_LOG_ID_IOSF,
+	ICE_AQC_FW_LOG_ID_PARSER,
+	ICE_AQC_FW_LOG_ID_SW,
+	ICE_AQC_FW_LOG_ID_SCHEDULER,
+	ICE_AQC_FW_LOG_ID_TXQ,
+	ICE_AQC_FW_LOG_ID_RSVD,
+	ICE_AQC_FW_LOG_ID_POST,
+	ICE_AQC_FW_LOG_ID_WATCHDOG,
+	ICE_AQC_FW_LOG_ID_TASK_DISPATCH,
+	ICE_AQC_FW_LOG_ID_MNG,
+	ICE_AQC_FW_LOG_ID_MAX,
+};
+
+/* Defines for both above FW logging command/response buffers */
+#define ICE_AQC_FW_LOG_ID_S		0
+#define ICE_AQC_FW_LOG_ID_M		(0xFFF << ICE_AQC_FW_LOG_ID_S)
+
+#define ICE_AQC_FW_LOG_CONF_SUCCESS	0	/* Used by response */
+#define ICE_AQC_FW_LOG_CONF_BAD_INDX	BIT(12)	/* Used by response */
+
+#define ICE_AQC_FW_LOG_EN_S		12
+#define ICE_AQC_FW_LOG_EN_M		(0xF << ICE_AQC_FW_LOG_EN_S)
+#define ICE_AQC_FW_LOG_INFO_EN		BIT(12)	/* Used by command */
+#define ICE_AQC_FW_LOG_INIT_EN		BIT(13)	/* Used by command */
+#define ICE_AQC_FW_LOG_FLOW_EN		BIT(14)	/* Used by command */
+#define ICE_AQC_FW_LOG_ERR_EN		BIT(15)	/* Used by command */
+
+/* Get/Clear FW Log (indirect 0xFF11) */
+struct ice_aqc_get_clear_fw_log {
+	u8 flags;
+#define ICE_AQC_FW_LOG_CLEAR		BIT(0)
+#define ICE_AQC_FW_LOG_MORE_DATA_AVAIL	BIT(1)
+	u8 rsvd1[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Download Package (indirect 0x0C40) */
+/* Also used for Update Package (indirect 0x0C42) */
+struct ice_aqc_download_pkg {
+	u8 flags;
+#define ICE_AQC_DOWNLOAD_PKG_LAST_BUF	0x01
+	u8 reserved[3];
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_download_pkg_resp {
+	__le32 error_offset;
+	__le32 error_info;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get Package Info List (indirect 0x0C43) */
+struct ice_aqc_get_pkg_info_list {
+	__le32 reserved1;
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Version format for packages */
+struct ice_pkg_ver {
+	u8 major;
+	u8 minor;
+	u8 update;
+	u8 draft;
+};
+
+#define ICE_PKG_NAME_SIZE	32
+#define ICE_SEG_NAME_SIZE	28
+
+struct ice_aqc_get_pkg_info {
+	struct ice_pkg_ver ver;
+	char name[ICE_SEG_NAME_SIZE];
+	__le32 track_id;
+	u8 is_in_nvm;
+	u8 is_active;
+	u8 is_active_at_boot;
+	u8 is_modified;
+};
+
+/* Get Package Info List response buffer format (0x0C43) */
+struct ice_aqc_get_pkg_info_resp {
+	__le32 count;
+	struct ice_aqc_get_pkg_info pkg_info[];
+};
+
+/* Lan Queue Overflow Event (direct, 0x1001) */
+struct ice_aqc_event_lan_overflow {
+	__le32 prtdcb_ruptq;
+	__le32 qtx_ctl;
+	u8 reserved[8];
+};
+
+/**
+ * struct ice_aq_desc - Admin Queue (AQ) descriptor
+ * @flags: ICE_AQ_FLAG_* flags
+ * @opcode: AQ command opcode
+ * @datalen: length in bytes of indirect/external data buffer
+ * @retval: return value from firmware
+ * @cookie_high: opaque data high-half
+ * @cookie_low: opaque data low-half
+ * @params: command-specific parameters
+ *
+ * Descriptor format for commands the driver posts on the Admin Transmit Queue
+ * (ATQ). The firmware writes back onto the command descriptor and returns
+ * the result of the command. Asynchronous events that are not an immediate
+ * result of the command are written to the Admin Receive Queue (ARQ) using
+ * the same descriptor format. Descriptors are in little-endian notation with
+ * 32-bit words.
+ */
+struct ice_aq_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 retval;
+	__le32 cookie_high;
+	__le32 cookie_low;
+	union {
+		u8 raw[16];
+		struct ice_aqc_generic generic;
+		struct ice_aqc_get_ver get_ver;
+		struct ice_aqc_driver_ver driver_ver;
+		struct ice_aqc_q_shutdown q_shutdown;
+		struct ice_aqc_req_res res_owner;
+		struct ice_aqc_manage_mac_read mac_read;
+		struct ice_aqc_manage_mac_write mac_write;
+		struct ice_aqc_clear_pxe clear_pxe;
+		struct ice_aqc_list_caps get_cap;
+		struct ice_aqc_get_phy_caps get_phy;
+		struct ice_aqc_set_phy_cfg set_phy;
+		struct ice_aqc_restart_an restart_an;
+		struct ice_aqc_sff_eeprom read_write_sff_param;
+		struct ice_aqc_set_port_id_led set_port_id_led;
+		struct ice_aqc_get_sw_cfg get_sw_conf;
+		struct ice_aqc_sw_rules sw_rules;
+		struct ice_aqc_get_topo get_topo;
+		struct ice_aqc_sched_elem_cmd sched_elem_cmd;
+		struct ice_aqc_query_txsched_res query_sched_res;
+		struct ice_aqc_query_port_ets port_ets;
+		struct ice_aqc_rl_profile rl_profile;
+		struct ice_aqc_nvm nvm;
+		struct ice_aqc_nvm_checksum nvm_checksum;
+		struct ice_aqc_nvm_pkg_data pkg_data;
+		struct ice_aqc_nvm_pass_comp_tbl pass_comp_tbl;
+		struct ice_aqc_pf_vf_msg virt;
+		struct ice_aqc_lldp_get_mib lldp_get_mib;
+		struct ice_aqc_lldp_set_mib_change lldp_set_event;
+		struct ice_aqc_lldp_stop lldp_stop;
+		struct ice_aqc_lldp_start lldp_start;
+		struct ice_aqc_lldp_set_local_mib lldp_set_mib;
+		struct ice_aqc_lldp_stop_start_specific_agent lldp_agent_ctrl;
+		struct ice_aqc_get_set_rss_lut get_set_rss_lut;
+		struct ice_aqc_get_set_rss_key get_set_rss_key;
+		struct ice_aqc_add_txqs add_txqs;
+		struct ice_aqc_dis_txqs dis_txqs;
+		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
+		struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
+		struct ice_aqc_fw_logging fw_logging;
+		struct ice_aqc_get_clear_fw_log get_clear_fw_log;
+		struct ice_aqc_download_pkg download_pkg;
+		struct ice_aqc_set_mac_lb set_mac_lb;
+		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
+		struct ice_aqc_set_mac_cfg set_mac_cfg;
+		struct ice_aqc_set_event_mask set_event_mask;
+		struct ice_aqc_get_link_status get_link_status;
+		struct ice_aqc_event_lan_overflow lan_overflow;
+		struct ice_aqc_get_link_topo get_link_topo;
+	} params;
+};
+
+/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
+#define ICE_AQ_LG_BUF	512
+
+#define ICE_AQ_FLAG_ERR_S	2
+#define ICE_AQ_FLAG_LB_S	9
+#define ICE_AQ_FLAG_RD_S	10
+#define ICE_AQ_FLAG_BUF_S	12
+#define ICE_AQ_FLAG_SI_S	13
+
+#define ICE_AQ_FLAG_ERR		BIT(ICE_AQ_FLAG_ERR_S) /* 0x4    */
+#define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
+#define ICE_AQ_FLAG_RD		BIT(ICE_AQ_FLAG_RD_S)  /* 0x400  */
+#define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
+#define ICE_AQ_FLAG_SI		BIT(ICE_AQ_FLAG_SI_S)  /* 0x2000 */
+
+/* error codes */
+enum ice_aq_err {
+	ICE_AQ_RC_OK		= 0,  /* Success */
+	ICE_AQ_RC_EPERM		= 1,  /* Operation not permitted */
+	ICE_AQ_RC_ENOENT	= 2,  /* No such element */
+	ICE_AQ_RC_ENOMEM	= 9,  /* Out of memory */
+	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
+	ICE_AQ_RC_EEXIST	= 13, /* Object already exists */
+	ICE_AQ_RC_EINVAL	= 14, /* Invalid argument */
+	ICE_AQ_RC_ENOSPC	= 16, /* No space left or allocation failure */
+	ICE_AQ_RC_ENOSYS	= 17, /* Function not implemented */
+	ICE_AQ_RC_EMODE		= 21, /* Op not allowed in current dev mode */
+	ICE_AQ_RC_ENOSEC	= 24, /* Missing security manifest */
+	ICE_AQ_RC_EBADSIG	= 25, /* Bad RSA signature */
+	ICE_AQ_RC_ESVN		= 26, /* SVN number prohibits this package */
+	ICE_AQ_RC_EBADMAN	= 27, /* Manifest hash mismatch */
+	ICE_AQ_RC_EBADBUF	= 28, /* Buffer hash mismatches manifest */
+};
+
+/* Admin Queue command opcodes */
+enum ice_adminq_opc {
+	/* AQ commands */
+	ice_aqc_opc_get_ver				= 0x0001,
+	ice_aqc_opc_driver_ver				= 0x0002,
+	ice_aqc_opc_q_shutdown				= 0x0003,
+
+	/* resource ownership */
+	ice_aqc_opc_req_res				= 0x0008,
+	ice_aqc_opc_release_res				= 0x0009,
+
+	/* device/function capabilities */
+	ice_aqc_opc_list_func_caps			= 0x000A,
+	ice_aqc_opc_list_dev_caps			= 0x000B,
+
+	/* manage MAC address */
+	ice_aqc_opc_manage_mac_read			= 0x0107,
+	ice_aqc_opc_manage_mac_write			= 0x0108,
+
+	/* PXE */
+	ice_aqc_opc_clear_pxe_mode			= 0x0110,
+
+	/* internal switch commands */
+	ice_aqc_opc_get_sw_cfg				= 0x0200,
+
+	/* Alloc/Free/Get Resources */
+	ice_aqc_opc_alloc_res				= 0x0208,
+	ice_aqc_opc_free_res				= 0x0209,
+
+	/* VSI commands */
+	ice_aqc_opc_add_vsi				= 0x0210,
+	ice_aqc_opc_update_vsi				= 0x0211,
+	ice_aqc_opc_free_vsi				= 0x0213,
+
+	/* switch rules population commands */
+	ice_aqc_opc_add_sw_rules			= 0x02A0,
+	ice_aqc_opc_update_sw_rules			= 0x02A1,
+	ice_aqc_opc_remove_sw_rules			= 0x02A2,
+
+	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
+
+	/* transmit scheduler commands */
+	ice_aqc_opc_get_dflt_topo			= 0x0400,
+	ice_aqc_opc_add_sched_elems			= 0x0401,
+	ice_aqc_opc_cfg_sched_elems			= 0x0403,
+	ice_aqc_opc_get_sched_elems			= 0x0404,
+	ice_aqc_opc_suspend_sched_elems			= 0x0409,
+	ice_aqc_opc_resume_sched_elems			= 0x040A,
+	ice_aqc_opc_query_port_ets			= 0x040E,
+	ice_aqc_opc_delete_sched_elems			= 0x040F,
+	ice_aqc_opc_add_rl_profiles			= 0x0410,
+	ice_aqc_opc_query_sched_res			= 0x0412,
+	ice_aqc_opc_remove_rl_profiles			= 0x0415,
+
+	/* PHY commands */
+	ice_aqc_opc_get_phy_caps			= 0x0600,
+	ice_aqc_opc_set_phy_cfg				= 0x0601,
+	ice_aqc_opc_set_mac_cfg				= 0x0603,
+	ice_aqc_opc_restart_an				= 0x0605,
+	ice_aqc_opc_get_link_status			= 0x0607,
+	ice_aqc_opc_set_event_mask			= 0x0613,
+	ice_aqc_opc_set_mac_lb				= 0x0620,
+	ice_aqc_opc_get_link_topo			= 0x06E0,
+	ice_aqc_opc_set_port_id_led			= 0x06E9,
+	ice_aqc_opc_sff_eeprom				= 0x06EE,
+
+	/* NVM commands */
+	ice_aqc_opc_nvm_read				= 0x0701,
+	ice_aqc_opc_nvm_erase				= 0x0702,
+	ice_aqc_opc_nvm_write				= 0x0703,
+	ice_aqc_opc_nvm_checksum			= 0x0706,
+	ice_aqc_opc_nvm_write_activate			= 0x0707,
+	ice_aqc_opc_nvm_update_empr			= 0x0709,
+	ice_aqc_opc_nvm_pkg_data			= 0x070A,
+	ice_aqc_opc_nvm_pass_component_tbl		= 0x070B,
+
+	/* PF/VF mailbox commands */
+	ice_mbx_opc_send_msg_to_pf			= 0x0801,
+	ice_mbx_opc_send_msg_to_vf			= 0x0802,
+	/* LLDP commands */
+	ice_aqc_opc_lldp_get_mib			= 0x0A00,
+	ice_aqc_opc_lldp_set_mib_change			= 0x0A01,
+	ice_aqc_opc_lldp_stop				= 0x0A05,
+	ice_aqc_opc_lldp_start				= 0x0A06,
+	ice_aqc_opc_get_cee_dcb_cfg			= 0x0A07,
+	ice_aqc_opc_lldp_set_local_mib			= 0x0A08,
+	ice_aqc_opc_lldp_stop_start_specific_agent	= 0x0A09,
+
+	/* RSS commands */
+	ice_aqc_opc_set_rss_key				= 0x0B02,
+	ice_aqc_opc_set_rss_lut				= 0x0B03,
+	ice_aqc_opc_get_rss_key				= 0x0B04,
+	ice_aqc_opc_get_rss_lut				= 0x0B05,
+
+	/* Tx queue handling commands/events */
+	ice_aqc_opc_add_txqs				= 0x0C30,
+	ice_aqc_opc_dis_txqs				= 0x0C31,
+
+	/* package commands */
+	ice_aqc_opc_download_pkg			= 0x0C40,
+	ice_aqc_opc_update_pkg				= 0x0C42,
+	ice_aqc_opc_get_pkg_info_list			= 0x0C43,
+
+	/* Standalone Commands/Events */
+	ice_aqc_opc_event_lan_overflow			= 0x1001,
+
+	/* debug commands */
+	ice_aqc_opc_fw_logging				= 0xFF09,
+	ice_aqc_opc_fw_logging_info			= 0xFF10,
+};
+
+#endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
new file mode 100644
index 000000000..6560acd76
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice.h"
+
+/**
+ * ice_is_arfs_active - helper to check is aRFS is active
+ * @vsi: VSI to check
+ */
+static bool ice_is_arfs_active(struct ice_vsi *vsi)
+{
+	return !!vsi->arfs_fltr_list;
+}
+
+/**
+ * ice_is_arfs_using_perfect_flow - check if aRFS has active perfect filters
+ * @hw: pointer to the HW structure
+ * @flow_type: flow type as Flow Director understands it
+ *
+ * Flow Director will query this function to see if aRFS is currently using
+ * the specified flow_type for perfect (4-tuple) filters.
+ */
+bool
+ice_is_arfs_using_perfect_flow(struct ice_hw *hw, enum ice_fltr_ptype flow_type)
+{
+	struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
+	struct ice_pf *pf = hw->back;
+	struct ice_vsi *vsi;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return false;
+
+	arfs_fltr_cntrs = vsi->arfs_fltr_cntrs;
+
+	/* active counters can be updated by multiple CPUs */
+	smp_mb__before_atomic();
+	switch (flow_type) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		return atomic_read(&arfs_fltr_cntrs->active_udpv4_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		return atomic_read(&arfs_fltr_cntrs->active_udpv6_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		return atomic_read(&arfs_fltr_cntrs->active_tcpv4_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		return atomic_read(&arfs_fltr_cntrs->active_tcpv6_cnt) > 0;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_arfs_update_active_fltr_cntrs - update active filter counters for aRFS
+ * @vsi: VSI that aRFS is active on
+ * @entry: aRFS entry used to change counters
+ * @add: true to increment counter, false to decrement
+ */
+static void
+ice_arfs_update_active_fltr_cntrs(struct ice_vsi *vsi,
+				  struct ice_arfs_entry *entry, bool add)
+{
+	struct ice_arfs_active_fltr_cntrs *fltr_cntrs = vsi->arfs_fltr_cntrs;
+
+	switch (entry->fltr_info.flow_type) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_tcpv4_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_tcpv4_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_tcpv6_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_tcpv6_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_udpv4_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_udpv4_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_udpv6_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_udpv6_cnt);
+		break;
+	default:
+		dev_err(ice_pf_to_dev(vsi->back), "aRFS: Failed to update filter counters, invalid filter type %d\n",
+			entry->fltr_info.flow_type);
+	}
+}
+
+/**
+ * ice_arfs_del_flow_rules - delete the rules passed in from HW
+ * @vsi: VSI for the flow rules that need to be deleted
+ * @del_list_head: head of the list of ice_arfs_entry(s) for rule deletion
+ *
+ * Loop through the delete list passed in and remove the rules from HW. After
+ * each rule is deleted, disconnect and free the ice_arfs_entry because it is no
+ * longer being referenced by the aRFS hash table.
+ */
+static void
+ice_arfs_del_flow_rules(struct ice_vsi *vsi, struct hlist_head *del_list_head)
+{
+	struct ice_arfs_entry *e;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	hlist_for_each_entry_safe(e, n, del_list_head, list_entry) {
+		int result;
+
+		result = ice_fdir_write_fltr(vsi->back, &e->fltr_info, false,
+					     false);
+		if (!result)
+			ice_arfs_update_active_fltr_cntrs(vsi, e, false);
+		else
+			dev_dbg(dev, "Unable to delete aRFS entry, err %d fltr_state %d fltr_id %d flow_id %d Q %d\n",
+				result, e->fltr_state, e->fltr_info.fltr_id,
+				e->flow_id, e->fltr_info.q_index);
+
+		/* The aRFS hash table is no longer referencing this entry */
+		hlist_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
+/**
+ * ice_arfs_add_flow_rules - add the rules passed in from HW
+ * @vsi: VSI for the flow rules that need to be added
+ * @add_list_head: head of the list of ice_arfs_entry_ptr(s) for rule addition
+ *
+ * Loop through the add list passed in and remove the rules from HW. After each
+ * rule is added, disconnect and free the ice_arfs_entry_ptr node. Don't free
+ * the ice_arfs_entry(s) because they are still being referenced in the aRFS
+ * hash table.
+ */
+static void
+ice_arfs_add_flow_rules(struct ice_vsi *vsi, struct hlist_head *add_list_head)
+{
+	struct ice_arfs_entry_ptr *ep;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	hlist_for_each_entry_safe(ep, n, add_list_head, list_entry) {
+		int result;
+
+		result = ice_fdir_write_fltr(vsi->back,
+					     &ep->arfs_entry->fltr_info, true,
+					     false);
+		if (!result)
+			ice_arfs_update_active_fltr_cntrs(vsi, ep->arfs_entry,
+							  true);
+		else
+			dev_dbg(dev, "Unable to add aRFS entry, err %d fltr_state %d fltr_id %d flow_id %d Q %d\n",
+				result, ep->arfs_entry->fltr_state,
+				ep->arfs_entry->fltr_info.fltr_id,
+				ep->arfs_entry->flow_id,
+				ep->arfs_entry->fltr_info.q_index);
+
+		hlist_del(&ep->list_entry);
+		devm_kfree(dev, ep);
+	}
+}
+
+/**
+ * ice_arfs_is_flow_expired - check if the aRFS entry has expired
+ * @vsi: VSI containing the aRFS entry
+ * @arfs_entry: aRFS entry that's being checked for expiration
+ *
+ * Return true if the flow has expired, else false. This function should be used
+ * to determine whether or not an aRFS entry should be removed from the hardware
+ * and software structures.
+ */
+static bool
+ice_arfs_is_flow_expired(struct ice_vsi *vsi, struct ice_arfs_entry *arfs_entry)
+{
+#define ICE_ARFS_TIME_DELTA_EXPIRATION	msecs_to_jiffies(5000)
+	if (rps_may_expire_flow(vsi->netdev, arfs_entry->fltr_info.q_index,
+				arfs_entry->flow_id,
+				arfs_entry->fltr_info.fltr_id))
+		return true;
+
+	/* expiration timer only used for UDP filters */
+	if (arfs_entry->fltr_info.flow_type != ICE_FLTR_PTYPE_NONF_IPV4_UDP &&
+	    arfs_entry->fltr_info.flow_type != ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		return false;
+
+	return time_in_range64(arfs_entry->time_activated +
+			       ICE_ARFS_TIME_DELTA_EXPIRATION,
+			       arfs_entry->time_activated, get_jiffies_64());
+}
+
+/**
+ * ice_arfs_update_flow_rules - add/delete aRFS rules in HW
+ * @vsi: the VSI to be forwarded to
+ * @idx: index into the table of aRFS filter lists. Obtained from skb->hash
+ * @add_list: list to populate with filters to be added to Flow Director
+ * @del_list: list to populate with filters to be deleted from Flow Director
+ *
+ * Iterate over the hlist at the index given in the aRFS hash table and
+ * determine if there are any aRFS entries that need to be either added or
+ * deleted in the HW. If the aRFS entry is marked as ICE_ARFS_INACTIVE the
+ * filter needs to be added to HW, else if it's marked as ICE_ARFS_ACTIVE and
+ * the flow has expired delete the filter from HW. The caller of this function
+ * is expected to add/delete rules on the add_list/del_list respectively.
+ */
+static void
+ice_arfs_update_flow_rules(struct ice_vsi *vsi, u16 idx,
+			   struct hlist_head *add_list,
+			   struct hlist_head *del_list)
+{
+	struct ice_arfs_entry *e;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	/* go through the aRFS hlist at this idx and check for needed updates */
+	hlist_for_each_entry_safe(e, n, &vsi->arfs_fltr_list[idx], list_entry)
+		/* check if filter needs to be added to HW */
+		if (e->fltr_state == ICE_ARFS_INACTIVE) {
+			enum ice_fltr_ptype flow_type = e->fltr_info.flow_type;
+			struct ice_arfs_entry_ptr *ep =
+				devm_kzalloc(dev, sizeof(*ep), GFP_ATOMIC);
+
+			if (!ep)
+				continue;
+			INIT_HLIST_NODE(&ep->list_entry);
+			/* reference aRFS entry to add HW filter */
+			ep->arfs_entry = e;
+			hlist_add_head(&ep->list_entry, add_list);
+			e->fltr_state = ICE_ARFS_ACTIVE;
+			/* expiration timer only used for UDP flows */
+			if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+			    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+				e->time_activated = get_jiffies_64();
+		} else if (e->fltr_state == ICE_ARFS_ACTIVE) {
+			/* check if filter needs to be removed from HW */
+			if (ice_arfs_is_flow_expired(vsi, e)) {
+				/* remove aRFS entry from hash table for delete
+				 * and to prevent referencing it the next time
+				 * through this hlist index
+				 */
+				hlist_del(&e->list_entry);
+				e->fltr_state = ICE_ARFS_TODEL;
+				/* save reference to aRFS entry for delete */
+				hlist_add_head(&e->list_entry, del_list);
+			}
+		}
+}
+
+/**
+ * ice_sync_arfs_fltrs - update all aRFS filters
+ * @pf: board private structure
+ */
+void ice_sync_arfs_fltrs(struct ice_pf *pf)
+{
+	HLIST_HEAD(tmp_del_list);
+	HLIST_HEAD(tmp_add_list);
+	struct ice_vsi *pf_vsi;
+	unsigned int i;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	if (!ice_is_arfs_active(pf_vsi))
+		return;
+
+	spin_lock_bh(&pf_vsi->arfs_lock);
+	/* Once we process aRFS for the PF VSI get out */
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++)
+		ice_arfs_update_flow_rules(pf_vsi, i, &tmp_add_list,
+					   &tmp_del_list);
+	spin_unlock_bh(&pf_vsi->arfs_lock);
+
+	/* use list of ice_arfs_entry(s) for delete */
+	ice_arfs_del_flow_rules(pf_vsi, &tmp_del_list);
+
+	/* use list of ice_arfs_entry_ptr(s) for add */
+	ice_arfs_add_flow_rules(pf_vsi, &tmp_add_list);
+}
+
+/**
+ * ice_arfs_build_entry - builds an aRFS entry based on input
+ * @vsi: destination VSI for this flow
+ * @fk: flow dissector keys for creating the tuple
+ * @rxq_idx: Rx queue to steer this flow to
+ * @flow_id: passed down from the stack and saved for flow expiration
+ *
+ * returns an aRFS entry on success and NULL on failure
+ */
+static struct ice_arfs_entry *
+ice_arfs_build_entry(struct ice_vsi *vsi, const struct flow_keys *fk,
+		     u16 rxq_idx, u32 flow_id)
+{
+	struct ice_arfs_entry *arfs_entry;
+	struct ice_fdir_fltr *fltr_info;
+	u8 ip_proto;
+
+	arfs_entry = devm_kzalloc(ice_pf_to_dev(vsi->back),
+				  sizeof(*arfs_entry),
+				  GFP_ATOMIC | __GFP_NOWARN);
+	if (!arfs_entry)
+		return NULL;
+
+	fltr_info = &arfs_entry->fltr_info;
+	fltr_info->q_index = rxq_idx;
+	fltr_info->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+	fltr_info->dest_vsi = vsi->idx;
+	ip_proto = fk->basic.ip_proto;
+
+	if (fk->basic.n_proto == htons(ETH_P_IP)) {
+		fltr_info->ip.v4.proto = ip_proto;
+		fltr_info->flow_type = (ip_proto == IPPROTO_TCP) ?
+			ICE_FLTR_PTYPE_NONF_IPV4_TCP :
+			ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+		fltr_info->ip.v4.src_ip = fk->addrs.v4addrs.src;
+		fltr_info->ip.v4.dst_ip = fk->addrs.v4addrs.dst;
+		fltr_info->ip.v4.src_port = fk->ports.src;
+		fltr_info->ip.v4.dst_port = fk->ports.dst;
+	} else { /* ETH_P_IPV6 */
+		fltr_info->ip.v6.proto = ip_proto;
+		fltr_info->flow_type = (ip_proto == IPPROTO_TCP) ?
+			ICE_FLTR_PTYPE_NONF_IPV6_TCP :
+			ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+		memcpy(&fltr_info->ip.v6.src_ip, &fk->addrs.v6addrs.src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr_info->ip.v6.dst_ip, &fk->addrs.v6addrs.dst,
+		       sizeof(struct in6_addr));
+		fltr_info->ip.v6.src_port = fk->ports.src;
+		fltr_info->ip.v6.dst_port = fk->ports.dst;
+	}
+
+	arfs_entry->flow_id = flow_id;
+	fltr_info->fltr_id =
+		atomic_inc_return(vsi->arfs_last_fltr_id) % RPS_NO_FILTER;
+
+	return arfs_entry;
+}
+
+/**
+ * ice_arfs_is_perfect_flow_set - Check to see if perfect flow is set
+ * @hw: pointer to HW structure
+ * @l3_proto: ETH_P_IP or ETH_P_IPV6 in network order
+ * @l4_proto: IPPROTO_UDP or IPPROTO_TCP
+ *
+ * We only support perfect (4-tuple) filters for aRFS. This function allows aRFS
+ * to check if perfect (4-tuple) flow rules are currently in place by Flow
+ * Director.
+ */
+static bool
+ice_arfs_is_perfect_flow_set(struct ice_hw *hw, __be16 l3_proto, u8 l4_proto)
+{
+	unsigned long *perfect_fltr = hw->fdir_perfect_fltr;
+
+	/* advanced Flow Director disabled, perfect filters always supported */
+	if (!perfect_fltr)
+		return true;
+
+	if (l3_proto == htons(ETH_P_IP) && l4_proto == IPPROTO_UDP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV4_UDP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IP) && l4_proto == IPPROTO_TCP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV4_TCP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IPV6) && l4_proto == IPPROTO_UDP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV6_UDP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IPV6) && l4_proto == IPPROTO_TCP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV6_TCP, perfect_fltr);
+
+	return false;
+}
+
+/**
+ * ice_rx_flow_steer - steer the Rx flow to where application is being run
+ * @netdev: ptr to the netdev being adjusted
+ * @skb: buffer with required header information
+ * @rxq_idx: queue to which the flow needs to move
+ * @flow_id: flow identifier provided by the netdev
+ *
+ * Based on the skb, rxq_idx, and flow_id passed in add/update an entry in the
+ * aRFS hash table. Iterate over one of the hlists in the aRFS hash table and
+ * if the flow_id already exists in the hash table but the rxq_idx has changed
+ * mark the entry as ICE_ARFS_INACTIVE so it can get updated in HW, else
+ * if the entry is marked as ICE_ARFS_TODEL delete it from the aRFS hash table.
+ * If neither of the previous conditions are true then add a new entry in the
+ * aRFS hash table, which gets set to ICE_ARFS_INACTIVE by default so it can be
+ * added to HW.
+ */
+int
+ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
+		  u16 rxq_idx, u32 flow_id)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_arfs_entry *arfs_entry;
+	struct ice_vsi *vsi = np->vsi;
+	struct flow_keys fk;
+	struct ice_pf *pf;
+	__be16 n_proto;
+	u8 ip_proto;
+	u16 idx;
+	int ret;
+
+	/* failed to allocate memory for aRFS so don't crash */
+	if (unlikely(!vsi->arfs_fltr_list))
+		return -ENODEV;
+
+	pf = vsi->back;
+
+	if (skb->encapsulation)
+		return -EPROTONOSUPPORT;
+
+	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+		return -EPROTONOSUPPORT;
+
+	n_proto = fk.basic.n_proto;
+	/* Support only IPV4 and IPV6 */
+	if ((n_proto == htons(ETH_P_IP) && !ip_is_fragment(ip_hdr(skb))) ||
+	    n_proto == htons(ETH_P_IPV6))
+		ip_proto = fk.basic.ip_proto;
+	else
+		return -EPROTONOSUPPORT;
+
+	/* Support only TCP and UDP */
+	if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP)
+		return -EPROTONOSUPPORT;
+
+	/* only support 4-tuple filters for aRFS */
+	if (!ice_arfs_is_perfect_flow_set(&pf->hw, n_proto, ip_proto))
+		return -EOPNOTSUPP;
+
+	/* choose the aRFS list bucket based on skb hash */
+	idx = skb_get_hash_raw(skb) & ICE_ARFS_LST_MASK;
+	/* search for entry in the bucket */
+	spin_lock_bh(&vsi->arfs_lock);
+	hlist_for_each_entry(arfs_entry, &vsi->arfs_fltr_list[idx],
+			     list_entry) {
+		struct ice_fdir_fltr *fltr_info;
+
+		/* keep searching for the already existing arfs_entry flow */
+		if (arfs_entry->flow_id != flow_id)
+			continue;
+
+		fltr_info = &arfs_entry->fltr_info;
+		ret = fltr_info->fltr_id;
+
+		if (fltr_info->q_index == rxq_idx ||
+		    arfs_entry->fltr_state != ICE_ARFS_ACTIVE)
+			goto out;
+
+		/* update the queue to forward to on an already existing flow */
+		fltr_info->q_index = rxq_idx;
+		arfs_entry->fltr_state = ICE_ARFS_INACTIVE;
+		ice_arfs_update_active_fltr_cntrs(vsi, arfs_entry, false);
+		goto out_schedule_service_task;
+	}
+
+	arfs_entry = ice_arfs_build_entry(vsi, &fk, rxq_idx, flow_id);
+	if (!arfs_entry) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = arfs_entry->fltr_info.fltr_id;
+	INIT_HLIST_NODE(&arfs_entry->list_entry);
+	hlist_add_head(&arfs_entry->list_entry, &vsi->arfs_fltr_list[idx]);
+out_schedule_service_task:
+	ice_service_task_schedule(pf);
+out:
+	spin_unlock_bh(&vsi->arfs_lock);
+	return ret;
+}
+
+/**
+ * ice_init_arfs_cntrs - initialize aRFS counter values
+ * @vsi: VSI that aRFS counters need to be initialized on
+ */
+static int ice_init_arfs_cntrs(struct ice_vsi *vsi)
+{
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	vsi->arfs_fltr_cntrs = kzalloc(sizeof(*vsi->arfs_fltr_cntrs),
+				       GFP_KERNEL);
+	if (!vsi->arfs_fltr_cntrs)
+		return -ENOMEM;
+
+	vsi->arfs_last_fltr_id = kzalloc(sizeof(*vsi->arfs_last_fltr_id),
+					 GFP_KERNEL);
+	if (!vsi->arfs_last_fltr_id) {
+		kfree(vsi->arfs_fltr_cntrs);
+		vsi->arfs_fltr_cntrs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_init_arfs - initialize aRFS resources
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_init_arfs(struct ice_vsi *vsi)
+{
+	struct hlist_head *arfs_fltr_list;
+	unsigned int i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return;
+
+	arfs_fltr_list = kzalloc(sizeof(*arfs_fltr_list) * ICE_MAX_ARFS_LIST,
+				 GFP_KERNEL);
+	if (!arfs_fltr_list)
+		return;
+
+	if (ice_init_arfs_cntrs(vsi))
+		goto free_arfs_fltr_list;
+
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++)
+		INIT_HLIST_HEAD(&arfs_fltr_list[i]);
+
+	spin_lock_init(&vsi->arfs_lock);
+
+	vsi->arfs_fltr_list = arfs_fltr_list;
+
+	return;
+
+free_arfs_fltr_list:
+	kfree(arfs_fltr_list);
+}
+
+/**
+ * ice_clear_arfs - clear the aRFS hash table and any memory used for aRFS
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_clear_arfs(struct ice_vsi *vsi)
+{
+	struct device *dev;
+	unsigned int i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF || !vsi->back ||
+	    !vsi->arfs_fltr_list)
+		return;
+
+	dev = ice_pf_to_dev(vsi->back);
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++) {
+		struct ice_arfs_entry *r;
+		struct hlist_node *n;
+
+		spin_lock_bh(&vsi->arfs_lock);
+		hlist_for_each_entry_safe(r, n, &vsi->arfs_fltr_list[i],
+					  list_entry) {
+			hlist_del(&r->list_entry);
+			devm_kfree(dev, r);
+		}
+		spin_unlock_bh(&vsi->arfs_lock);
+	}
+
+	kfree(vsi->arfs_fltr_list);
+	vsi->arfs_fltr_list = NULL;
+	kfree(vsi->arfs_last_fltr_id);
+	vsi->arfs_last_fltr_id = NULL;
+	kfree(vsi->arfs_fltr_cntrs);
+	vsi->arfs_fltr_cntrs = NULL;
+}
+
+/**
+ * ice_free_cpu_rx_rmap - free setup CPU reverse map
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
+{
+	struct net_device *netdev;
+
+	if (!vsi || vsi->type != ICE_VSI_PF || !vsi->arfs_fltr_list)
+		return;
+
+	netdev = vsi->netdev;
+	if (!netdev || !netdev->rx_cpu_rmap ||
+	    netdev->reg_state != NETREG_REGISTERED)
+		return;
+
+	free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+	netdev->rx_cpu_rmap = NULL;
+}
+
+/**
+ * ice_set_cpu_rx_rmap - setup CPU reverse map for each queue
+ * @vsi: the VSI to be forwarded to
+ */
+int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
+{
+	struct net_device *netdev;
+	struct ice_pf *pf;
+	int base_idx, i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	pf = vsi->back;
+	netdev = vsi->netdev;
+	if (!pf || !netdev || !vsi->num_q_vectors ||
+	    vsi->netdev->reg_state != NETREG_REGISTERED)
+		return -EINVAL;
+
+	netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n",
+		   vsi->type, netdev->name, vsi->num_q_vectors);
+
+	netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
+	if (unlikely(!netdev->rx_cpu_rmap))
+		return -EINVAL;
+
+	base_idx = vsi->base_vector;
+	for (i = 0; i < vsi->num_q_vectors; i++)
+		if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+				     pf->msix_entries[base_idx + i].vector)) {
+			ice_free_cpu_rx_rmap(vsi);
+			return -EINVAL;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_remove_arfs - remove/clear all aRFS resources
+ * @pf: device private structure
+ */
+void ice_remove_arfs(struct ice_pf *pf)
+{
+	struct ice_vsi *pf_vsi;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	ice_free_cpu_rx_rmap(pf_vsi);
+	ice_clear_arfs(pf_vsi);
+}
+
+/**
+ * ice_rebuild_arfs - remove/clear all aRFS resources and rebuild after reset
+ * @pf: device private structure
+ */
+void ice_rebuild_arfs(struct ice_pf *pf)
+{
+	struct ice_vsi *pf_vsi;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	ice_remove_arfs(pf);
+	if (ice_set_cpu_rx_rmap(pf_vsi)) {
+		dev_err(ice_pf_to_dev(pf), "Failed to rebuild aRFS\n");
+		return;
+	}
+	ice_init_arfs(pf_vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.h b/drivers/net/ethernet/intel/ice/ice_arfs.h
new file mode 100644
index 000000000..f39cd1640
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_ARFS_H_
+#define _ICE_ARFS_H_
+enum ice_arfs_fltr_state {
+	ICE_ARFS_INACTIVE,
+	ICE_ARFS_ACTIVE,
+	ICE_ARFS_TODEL,
+};
+
+struct ice_arfs_entry {
+	struct ice_fdir_fltr fltr_info;
+	struct hlist_node list_entry;
+	u64 time_activated;	/* only valid for UDP flows */
+	u32 flow_id;
+	/* fltr_state = 0 - ICE_ARFS_INACTIVE:
+	 *	filter needs to be updated or programmed in HW.
+	 * fltr_state = 1 - ICE_ARFS_ACTIVE:
+	 *	filter is active and programmed in HW.
+	 * fltr_state = 2 - ICE_ARFS_TODEL:
+	 *	filter has been deleted from HW and needs to be removed from
+	 *	the aRFS hash table.
+	 */
+	u8 fltr_state;
+};
+
+struct ice_arfs_entry_ptr {
+	struct ice_arfs_entry *arfs_entry;
+	struct hlist_node list_entry;
+};
+
+struct ice_arfs_active_fltr_cntrs {
+	atomic_t active_tcpv4_cnt;
+	atomic_t active_tcpv6_cnt;
+	atomic_t active_udpv4_cnt;
+	atomic_t active_udpv6_cnt;
+};
+
+#ifdef CONFIG_RFS_ACCEL
+int
+ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
+		  u16 rxq_idx, u32 flow_id);
+void ice_clear_arfs(struct ice_vsi *vsi);
+void ice_free_cpu_rx_rmap(struct ice_vsi *vsi);
+void ice_init_arfs(struct ice_vsi *vsi);
+void ice_sync_arfs_fltrs(struct ice_pf *pf);
+int ice_set_cpu_rx_rmap(struct ice_vsi *vsi);
+void ice_remove_arfs(struct ice_pf *pf);
+void ice_rebuild_arfs(struct ice_pf *pf);
+bool
+ice_is_arfs_using_perfect_flow(struct ice_hw *hw,
+			       enum ice_fltr_ptype flow_type);
+#else
+#define ice_sync_arfs_fltrs(pf) do {} while (0)
+#define ice_init_arfs(vsi) do {} while (0)
+#define ice_clear_arfs(vsi) do {} while (0)
+#define ice_remove_arfs(pf) do {} while (0)
+#define ice_free_cpu_rx_rmap(vsi) do {} while (0)
+#define ice_rebuild_arfs(pf) do {} while (0)
+
+static inline int ice_set_cpu_rx_rmap(struct ice_vsi __always_unused *vsi)
+{
+	return 0;
+}
+
+static inline int
+ice_rx_flow_steer(struct net_device __always_unused *netdev,
+		  const struct sk_buff __always_unused *skb,
+		  u16 __always_unused rxq_idx, u32 __always_unused flow_id)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool
+ice_is_arfs_using_perfect_flow(struct ice_hw __always_unused *hw,
+			       enum ice_fltr_ptype __always_unused flow_type)
+{
+	return false;
+}
+#endif /* CONFIG_RFS_ACCEL */
+#endif /* _ICE_ARFS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
new file mode 100644
index 000000000..59df4c9bd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -0,0 +1,876 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <net/xdp_sock_drv.h>
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+	unsigned int offset, i;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+					    0, qs_cfg->q_count, 0);
+	if (offset >= qs_cfg->pf_map_size) {
+		mutex_unlock(qs_cfg->qs_mutex);
+		return -ENOMEM;
+	}
+
+	bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+	for (i = 0; i < qs_cfg->q_count; i++)
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)(i + offset);
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+	unsigned int i, index = 0;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	for (i = 0; i < qs_cfg->q_count; i++) {
+		index = find_next_zero_bit(qs_cfg->pf_map,
+					   qs_cfg->pf_map_size, index);
+		if (index >= qs_cfg->pf_map_size)
+			goto err_scatter;
+		set_bit(index, qs_cfg->pf_map);
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)index;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+err_scatter:
+	for (index = 0; index < i; index++) {
+		clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+		qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+	int i;
+
+	for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+		if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+			      QRX_CTRL_QENA_STAT_M))
+			return 0;
+
+		usleep_range(20, 40);
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector and set default value for ITR setting associated
+ * with this q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+
+	/* allocate q_vector */
+	q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector),
+				GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	q_vector->tx.itr_setting = ICE_DFLT_TX_ITR;
+	q_vector->rx.itr_setting = ICE_DFLT_RX_ITR;
+	if (vsi->type == ICE_VSI_VF)
+		goto out;
+	/* only set affinity_mask if the CPU is online */
+	if (cpu_online(v_idx))
+		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+	/* This will not be called in the driver load path because the netdev
+	 * will not be created yet. All other cases with register the NAPI
+	 * handler here (i.e. resume, reset/rebuild, etc.)
+	 */
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+			       NAPI_POLL_WEIGHT);
+
+out:
+	/* tie q_vector and VSI together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_pf *pf = vsi->back;
+	struct ice_ring *ring;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (!vsi->q_vectors[v_idx]) {
+		dev_dbg(dev, "Queue vector at index %d not found\n", v_idx);
+		return;
+	}
+	q_vector = vsi->q_vectors[v_idx];
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ring->q_vector = NULL;
+	ice_for_each_ring(ring, q_vector->rx)
+		ring->q_vector = NULL;
+
+	/* only VSI with an associated netdev is set up with NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	devm_kfree(dev, q_vector);
+	vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+	u32 regval = rd32(hw, GLINT_CTL);
+
+	/* no need to update global register if ITR gran is already set */
+	if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+	     GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+	     GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+	     GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+	      GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+		return;
+
+	regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+		  GLINT_CTL_ITR_GRAN_200_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+		  GLINT_CTL_ITR_GRAN_100_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+		  GLINT_CTL_ITR_GRAN_50_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+		  GLINT_CTL_ITR_GRAN_25_M);
+	wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_calc_q_handle - calculate the queue handle
+ * @vsi: VSI that ring belongs to
+ * @ring: ring to get the absolute queue index
+ * @tc: traffic class number
+ */
+static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
+{
+	WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n");
+
+	/* Idea here for calculation is that we subtract the number of queue
+	 * count from TC that ring belongs to from it's absolute queue index
+	 * and as a result we get the queue's index within TC.
+	 */
+	return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+
+	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+	tlan_ctx->port_num = vsi->port_info->lport;
+
+	/* Transmit Queue Length */
+	tlan_ctx->qlen = ring->count;
+
+	ice_set_cgd_num(tlan_ctx, ring);
+
+	/* PF number */
+	tlan_ctx->pf_num = hw->pf_id;
+
+	/* queue belongs to a specific VSI type
+	 * VF / VM index should be programmed per vmvf_type setting:
+	 * for vmvf_type = VF, it is VF number between 0-256
+	 * for vmvf_type = VM, it is VM number between 0-767
+	 * for PF or EMP this field should be set to zero
+	 */
+	switch (vsi->type) {
+	case ICE_VSI_LB:
+	case ICE_VSI_CTRL:
+	case ICE_VSI_PF:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		/* Firmware expects vmvf_num to be absolute VF ID */
+		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+		break;
+	default:
+		return;
+	}
+
+	/* make sure the context is associated with the right VSI */
+	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+	tlan_ctx->tso_ena = ICE_TX_LEGACY;
+	tlan_ctx->tso_qnum = pf_q;
+
+	/* Legacy or Advanced Host Interface:
+	 * 0: Advanced Host Interface
+	 * 1: Legacy Host Interface
+	 */
+	tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+	struct device *dev = ice_pf_to_dev(ring->vsi->back);
+	int chain_len = ICE_MAX_CHAINED_RX_BUFS;
+	u16 num_bufs = ICE_DESC_UNUSED(ring);
+	struct ice_vsi *vsi = ring->vsi;
+	u32 rxdid = ICE_RXDID_FLEX_NIC;
+	struct ice_rlan_ctx rlan_ctx;
+	struct ice_hw *hw;
+	u16 pf_q;
+	int err;
+
+	hw = &vsi->back->hw;
+
+	/* what is Rx queue number in global space of 2K Rx queues */
+	pf_q = vsi->rxq_map[ring->q_index];
+
+	/* clear the context structure first */
+	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+	ring->rx_buf_len = vsi->rx_buf_len;
+
+	if (ring->vsi->type == ICE_VSI_PF) {
+		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+			/* coverity[check_return] */
+			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->q_index);
+
+		ring->xsk_pool = ice_xsk_pool(ring);
+		if (ring->xsk_pool) {
+			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+			ring->rx_buf_len =
+				xsk_pool_get_rx_frame_size(ring->xsk_pool);
+			/* For AF_XDP ZC, we disallow packets to span on
+			 * multiple buffers, thus letting us skip that
+			 * handling in the fast-path.
+			 */
+			chain_len = 1;
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_XSK_BUFF_POOL,
+							 NULL);
+			if (err)
+				return err;
+			xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+
+			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
+				 ring->q_index);
+		} else {
+			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+				/* coverity[check_return] */
+				xdp_rxq_info_reg(&ring->xdp_rxq,
+						 ring->netdev,
+						 ring->q_index);
+
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_PAGE_SHARED,
+							 NULL);
+			if (err)
+				return err;
+		}
+	}
+	/* Receive Queue Base Address.
+	 * Indicates the starting address of the descriptor queue defined in
+	 * 128 Byte units.
+	 */
+	rlan_ctx.base = ring->dma >> 7;
+
+	rlan_ctx.qlen = ring->count;
+
+	/* Receive Packet Data Buffer Size.
+	 * The Packet Data Buffer Size is defined in 128 byte units.
+	 */
+	rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
+				     BIT_ULL(ICE_RLAN_CTX_DBUF_S));
+
+	/* use 32 byte descriptors */
+	rlan_ctx.dsize = 1;
+
+	/* Strip the Ethernet CRC bytes before the packet is posted to host
+	 * memory.
+	 */
+	rlan_ctx.crcstrip = 1;
+
+	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+	rlan_ctx.l2tsel = 1;
+
+	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+	/* This controls whether VLAN is stripped from inner headers
+	 * The VLAN in the inner L2 header is stripped to the receive
+	 * descriptor if enabled by this flag.
+	 */
+	rlan_ctx.showiv = 0;
+
+	/* Max packet size for this queue - must not be set to a larger value
+	 * than 5 x DBUF
+	 */
+	rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
+			       chain_len * ring->rx_buf_len);
+
+	/* Rx queue threshold in units of 64 */
+	rlan_ctx.lrxqthresh = 1;
+
+	/* Enable Flexible Descriptors in the queue context which
+	 * allows this driver to select a specific receive descriptor format
+	 * increasing context priority to pick up profile ID; default is 0x01;
+	 * setting to 0x03 to ensure profile is programming if prev context is
+	 * of same priority
+	 */
+	if (vsi->type != ICE_VSI_VF)
+		ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3);
+	else
+		ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3);
+
+	/* Absolute queue number out of 2K needs to be passed */
+	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+	if (err) {
+		dev_err(dev, "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+			pf_q, err);
+		return -EIO;
+	}
+
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
+
+	/* configure Rx buffer alignment */
+	if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+		ice_clear_ring_build_skb_ena(ring);
+	else
+		ice_set_ring_build_skb_ena(ring);
+
+	/* init queue specific tail register */
+	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+	writel(0, ring->tail);
+
+	if (ring->xsk_pool) {
+		if (!xsk_buff_can_alloc(ring->xsk_pool, num_bufs)) {
+			dev_warn(dev, "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n",
+				 num_bufs, ring->q_index);
+			dev_warn(dev, "Change Rx ring/fill queue size to avoid performance issues\n");
+
+			return 0;
+		}
+
+		err = ice_alloc_rx_bufs_zc(ring, num_bufs);
+		if (err)
+			dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n",
+				 ring->q_index, pf_q);
+		return 0;
+	}
+
+	ice_alloc_rx_bufs(ring, num_bufs);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+	int ret = 0;
+
+	ret = __ice_vsi_get_qs_contig(qs_cfg);
+	if (ret) {
+		/* contig failed, so try with scatter approach */
+		qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+		qs_cfg->q_count = min_t(unsigned int, qs_cfg->q_count,
+					qs_cfg->scatter_count);
+		ret = __ice_vsi_get_qs_sc(qs_cfg);
+	}
+	return ret;
+}
+
+/**
+ * ice_vsi_ctrl_one_rx_ring - start/stop VSI's Rx ring with no busy wait
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx ring
+ * @rxq_idx: 0-based Rx queue index for the VSI passed in
+ * @wait: wait or don't wait for configuration to finish in hardware
+ *
+ * Return 0 on success and negative on error.
+ */
+int
+ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait)
+{
+	int pf_q = vsi->rxq_map[rxq_idx];
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 rx_reg;
+
+	rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+	/* Skip if the queue is already in the requested state */
+	if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+		return 0;
+
+	/* turn on/off the queue */
+	if (ena)
+		rx_reg |= QRX_CTRL_QENA_REQ_M;
+	else
+		rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+	wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+	if (!wait)
+		return 0;
+
+	ice_flush(hw);
+	return ice_pf_rxq_wait(pf, pf_q, ena);
+}
+
+/**
+ * ice_vsi_wait_one_rx_ring - wait for a VSI's Rx ring to be stopped/started
+ * @vsi: the VSI being configured
+ * @ena: true/false to verify Rx ring has been enabled/disabled respectively
+ * @rxq_idx: 0-based Rx queue index for the VSI passed in
+ *
+ * This routine will wait for the given Rx queue of the VSI to reach the
+ * enabled or disabled state. Returns -ETIMEDOUT in case of failing to reach
+ * the requested state after multiple retries; else will return 0 in case of
+ * success.
+ */
+int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+	int pf_q = vsi->rxq_map[rxq_idx];
+	struct ice_pf *pf = vsi->back;
+
+	return ice_pf_rxq_wait(pf, pf_q, ena);
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	u16 v_idx;
+	int err;
+
+	if (vsi->q_vectors[0]) {
+		dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) {
+		err = ice_vsi_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx--)
+		ice_free_q_vector(vsi, v_idx);
+
+	dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+		vsi->num_q_vectors, vsi->vsi_num, err);
+	vsi->num_q_vectors = 0;
+	return err;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+	int q_vectors = vsi->num_q_vectors;
+	u16 tx_rings_rem, rx_rings_rem;
+	int v_id;
+
+	/* initially assigning remaining rings count to VSIs num queue value */
+	tx_rings_rem = vsi->num_txq;
+	rx_rings_rem = vsi->num_rxq;
+
+	for (v_id = 0; v_id < q_vectors; v_id++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+		u8 tx_rings_per_v, rx_rings_per_v;
+		u16 q_id, q_base;
+
+		/* Tx rings mapping to vector */
+		tx_rings_per_v = (u8)DIV_ROUND_UP(tx_rings_rem,
+						  q_vectors - v_id);
+		q_vector->num_ring_tx = tx_rings_per_v;
+		q_vector->tx.ring = NULL;
+		q_vector->tx.itr_idx = ICE_TX_ITR;
+		q_base = vsi->num_txq - tx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+			tx_ring->q_vector = q_vector;
+			tx_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = tx_ring;
+		}
+		tx_rings_rem -= tx_rings_per_v;
+
+		/* Rx rings mapping to vector */
+		rx_rings_per_v = (u8)DIV_ROUND_UP(rx_rings_rem,
+						  q_vectors - v_id);
+		q_vector->num_ring_rx = rx_rings_per_v;
+		q_vector->rx.ring = NULL;
+		q_vector->rx.itr_idx = ICE_RX_ITR;
+		q_base = vsi->num_rxq - rx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+			rx_ring->q_vector = q_vector;
+			rx_ring->next = q_vector->rx.ring;
+			q_vector->rx.ring = rx_ring;
+		}
+		rx_rings_rem -= rx_rings_per_v;
+	}
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @qg_buf: queue group buffer
+ */
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+		struct ice_aqc_add_tx_qgrp *qg_buf)
+{
+	u8 buf_len = struct_size(qg_buf, txqs, 1);
+	struct ice_tlan_ctx tlan_ctx = { 0 };
+	struct ice_aqc_add_txqs_perq *txq;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u16 pf_q;
+	u8 tc;
+
+	pf_q = ring->reg_idx;
+	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+	/* copy context contents into the qg_buf */
+	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+	ice_set_ctx(hw, (u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+		    ice_tlan_ctx_info);
+
+	/* init queue specific tail reg. It is referred as
+	 * transmit comm scheduler queue doorbell.
+	 */
+	ring->tail = hw->hw_addr + QTX_COMM_DBELL(pf_q);
+
+	if (IS_ENABLED(CONFIG_DCB))
+		tc = ring->dcb_tc;
+	else
+		tc = 0;
+
+	/* Add unique software queue handle of the Tx queue per
+	 * TC into the VSI Tx ring
+	 */
+	ring->q_handle = ice_calc_q_handle(vsi, ring, tc);
+
+	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
+				 1, qg_buf, buf_len, NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %s\n",
+			ice_stat_str(status));
+		return -ENODEV;
+	}
+
+	/* Add Tx Queue TEID into the VSI Tx ring from the
+	 * response. This will complete configuring and
+	 * enabling the queue.
+	 */
+	txq = &qg_buf->txqs[0];
+	if (pf_q == le16_to_cpu(txq->txq_id))
+		ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+	return 0;
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	ice_cfg_itr_gran(hw);
+
+	if (q_vector->num_ring_rx) {
+		struct ice_ring_container *rc = &q_vector->rx;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+	}
+
+	if (q_vector->num_ring_tx) {
+		struct ice_ring_container *rc = &q_vector->tx;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+	}
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+		wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+		     val);
+	}
+	ice_flush(hw);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+	     GLINT_DYN_CTL_SWINT_TRIG_M |
+	     GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u32 val;
+
+	/* clear cause_ena bit for disabled queues */
+	val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+	val &= ~QINT_TQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+	/* software is expected to wait for 100 ns */
+	ndelay(100);
+
+	/* trigger a software interrupt for the vector
+	 * associated to the queue to schedule NAPI handler
+	 */
+	q_vector = ring->q_vector;
+	if (q_vector)
+		ice_trigger_sw_intr(hw, q_vector);
+
+	status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+				 txq_meta->tc, 1, &txq_meta->q_handle,
+				 &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+				 rel_vmvf_num, NULL);
+
+	/* if the disable queue command was exercised during an
+	 * active reset flow, ICE_ERR_RESET_ONGOING is returned.
+	 * This is not an error as the reset operation disables
+	 * queues at the hardware level anyway.
+	 */
+	if (status == ICE_ERR_RESET_ONGOING) {
+		dev_dbg(ice_pf_to_dev(vsi->back), "Reset in progress. LAN Tx queues already disabled\n");
+	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n");
+	} else if (status) {
+		dev_dbg(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n",
+			ice_stat_str(status));
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		  struct ice_txq_meta *txq_meta)
+{
+	u8 tc;
+
+	if (IS_ENABLED(CONFIG_DCB))
+		tc = ring->dcb_tc;
+	else
+		tc = 0;
+
+	txq_meta->q_id = ring->reg_idx;
+	txq_meta->q_teid = ring->txq_teid;
+	txq_meta->q_handle = ring->q_handle;
+	txq_meta->vsi_idx = vsi->idx;
+	txq_meta->tc = tc;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
new file mode 100644
index 000000000..44efdb627
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_BASE_H_
+#define _ICE_BASE_H_
+
+#include "ice.h"
+
+int ice_setup_rx_ctx(struct ice_ring *ring);
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
+int
+ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait);
+int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+		struct ice_aqc_add_tx_qgrp *qg_buf);
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta);
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		  struct ice_txq_meta *txq_meta);
+#endif /* _ICE_BASE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
new file mode 100644
index 000000000..ecdc467c4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -0,0 +1,4409 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_adminq_cmd.h"
+#include "ice_flow.h"
+
+#define ICE_PF_RESET_WAIT_COUNT	300
+
+/**
+ * ice_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the MAC type of the adapter based on the
+ * vendor ID and device ID stored in the HW structure.
+ */
+static enum ice_status ice_set_mac_type(struct ice_hw *hw)
+{
+	if (hw->vendor_id != PCI_VENDOR_ID_INTEL)
+		return ICE_ERR_DEVICE_NOT_SUPPORTED;
+
+	switch (hw->device_id) {
+	case ICE_DEV_ID_E810C_BACKPLANE:
+	case ICE_DEV_ID_E810C_QSFP:
+	case ICE_DEV_ID_E810C_SFP:
+	case ICE_DEV_ID_E810_XXV_BACKPLANE:
+	case ICE_DEV_ID_E810_XXV_QSFP:
+	case ICE_DEV_ID_E810_XXV_SFP:
+		hw->mac_type = ICE_MAC_E810;
+		break;
+	case ICE_DEV_ID_E823C_10G_BASE_T:
+	case ICE_DEV_ID_E823C_BACKPLANE:
+	case ICE_DEV_ID_E823C_QSFP:
+	case ICE_DEV_ID_E823C_SFP:
+	case ICE_DEV_ID_E823C_SGMII:
+	case ICE_DEV_ID_E822C_10G_BASE_T:
+	case ICE_DEV_ID_E822C_BACKPLANE:
+	case ICE_DEV_ID_E822C_QSFP:
+	case ICE_DEV_ID_E822C_SFP:
+	case ICE_DEV_ID_E822C_SGMII:
+	case ICE_DEV_ID_E822L_10G_BASE_T:
+	case ICE_DEV_ID_E822L_BACKPLANE:
+	case ICE_DEV_ID_E822L_SFP:
+	case ICE_DEV_ID_E822L_SGMII:
+	case ICE_DEV_ID_E823L_10G_BASE_T:
+	case ICE_DEV_ID_E823L_1GBE:
+	case ICE_DEV_ID_E823L_BACKPLANE:
+	case ICE_DEV_ID_E823L_QSFP:
+	case ICE_DEV_ID_E823L_SFP:
+		hw->mac_type = ICE_MAC_GENERIC;
+		break;
+	default:
+		hw->mac_type = ICE_MAC_UNKNOWN;
+		break;
+	}
+
+	ice_debug(hw, ICE_DBG_INIT, "mac_type: %d\n", hw->mac_type);
+	return 0;
+}
+
+/**
+ * ice_clear_pf_cfg - Clear PF configuration
+ * @hw: pointer to the hardware structure
+ *
+ * Clears any existing PF configuration (VSIs, VSI lists, switch rules, port
+ * configuration, flow director filters, etc.).
+ */
+enum ice_status ice_clear_pf_cfg(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pf_cfg);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_manage_mac_read - manage MAC address read command
+ * @hw: pointer to the HW struct
+ * @buf: a virtual buffer to hold the manage MAC read response
+ * @buf_size: Size of the virtual buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to return per PF station MAC address (0x0107).
+ * NOTE: Upon successful completion of this command, MAC address information
+ * is returned in user specified buffer. Please interpret user specified
+ * buffer as "manage_mac_read" response.
+ * Response such as various MAC addresses are stored in HW struct (port.mac)
+ * ice_discover_dev_caps is expected to be called before this function is
+ * called.
+ */
+static enum ice_status
+ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_manage_mac_read_resp *resp;
+	struct ice_aqc_manage_mac_read *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 flags;
+	u8 i;
+
+	cmd = &desc.params.mac_read;
+
+	if (buf_size < sizeof(*resp))
+		return ICE_ERR_BUF_TOO_SHORT;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_read);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (status)
+		return status;
+
+	resp = (struct ice_aqc_manage_mac_read_resp *)buf;
+	flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M;
+
+	if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) {
+		ice_debug(hw, ICE_DBG_LAN, "got invalid MAC address\n");
+		return ICE_ERR_CFG;
+	}
+
+	/* A single port can report up to two (LAN and WoL) addresses */
+	for (i = 0; i < cmd->num_addr; i++)
+		if (resp[i].addr_type == ICE_AQC_MAN_MAC_ADDR_TYPE_LAN) {
+			ether_addr_copy(hw->port_info->mac.lan_addr,
+					resp[i].mac_addr);
+			ether_addr_copy(hw->port_info->mac.perm_addr,
+					resp[i].mac_addr);
+			break;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_aq_get_phy_caps - returns PHY capabilities
+ * @pi: port information structure
+ * @qual_mods: report qualified modules
+ * @report_mode: report mode capabilities
+ * @pcaps: structure for PHY capabilities to be filled
+ * @cd: pointer to command details structure or NULL
+ *
+ * Returns the various PHY capabilities supported on the Port (0x0600)
+ */
+enum ice_status
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+		    struct ice_aqc_get_phy_caps_data *pcaps,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_phy_caps *cmd;
+	u16 pcaps_size = sizeof(*pcaps);
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	cmd = &desc.params.get_phy;
+
+	if (!pcaps || (report_mode & ~ICE_AQC_REPORT_MODE_M) || !pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
+
+	if (qual_mods)
+		cmd->param0 |= cpu_to_le16(ICE_AQC_GET_PHY_RQM);
+
+	cmd->param0 |= cpu_to_le16(report_mode);
+	status = ice_aq_send_cmd(hw, &desc, pcaps, pcaps_size, cd);
+
+	ice_debug(hw, ICE_DBG_LINK, "get phy caps - report_mode = 0x%x\n",
+		  report_mode);
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_low = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(pcaps->phy_type_low));
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_high = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(pcaps->phy_type_high));
+	ice_debug(hw, ICE_DBG_LINK, "	caps = 0x%x\n", pcaps->caps);
+	ice_debug(hw, ICE_DBG_LINK, "	low_power_ctrl_an = 0x%x\n",
+		  pcaps->low_power_ctrl_an);
+	ice_debug(hw, ICE_DBG_LINK, "	eee_cap = 0x%x\n", pcaps->eee_cap);
+	ice_debug(hw, ICE_DBG_LINK, "	eeer_value = 0x%x\n",
+		  pcaps->eeer_value);
+	ice_debug(hw, ICE_DBG_LINK, "	link_fec_options = 0x%x\n",
+		  pcaps->link_fec_options);
+	ice_debug(hw, ICE_DBG_LINK, "	module_compliance_enforcement = 0x%x\n",
+		  pcaps->module_compliance_enforcement);
+	ice_debug(hw, ICE_DBG_LINK, "   extended_compliance_code = 0x%x\n",
+		  pcaps->extended_compliance_code);
+	ice_debug(hw, ICE_DBG_LINK, "   module_type[0] = 0x%x\n",
+		  pcaps->module_type[0]);
+	ice_debug(hw, ICE_DBG_LINK, "   module_type[1] = 0x%x\n",
+		  pcaps->module_type[1]);
+	ice_debug(hw, ICE_DBG_LINK, "   module_type[2] = 0x%x\n",
+		  pcaps->module_type[2]);
+
+	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) {
+		pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
+		pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high);
+		memcpy(pi->phy.link_info.module_type, &pcaps->module_type,
+		       sizeof(pi->phy.link_info.module_type));
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_get_link_topo_handle - get link topology node return status
+ * @pi: port information structure
+ * @node_type: requested node type
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get link topology node return status for specified node type (0x06E0)
+ *
+ * Node type cage can be used to determine if cage is present. If AQC
+ * returns error (ENOENT), then no cage present. If no cage present, then
+ * connection type is backplane or BASE-T.
+ */
+static enum ice_status
+ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type,
+			    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_link_topo *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.get_link_topo;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+
+	cmd->addr.node_type_ctx = (ICE_AQC_LINK_TOPO_NODE_CTX_PORT <<
+				   ICE_AQC_LINK_TOPO_NODE_CTX_S);
+
+	/* set node type */
+	cmd->addr.node_type_ctx |= (ICE_AQC_LINK_TOPO_NODE_TYPE_M & node_type);
+
+	return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_is_media_cage_present
+ * @pi: port information structure
+ *
+ * Returns true if media cage is present, else false. If no cage, then
+ * media type is backplane or BASE-T.
+ */
+static bool ice_is_media_cage_present(struct ice_port_info *pi)
+{
+	/* Node type cage can be used to determine if cage is present. If AQC
+	 * returns error (ENOENT), then no cage present. If no cage present then
+	 * connection type is backplane or BASE-T.
+	 */
+	return !ice_aq_get_link_topo_handle(pi,
+					    ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE,
+					    NULL);
+}
+
+/**
+ * ice_get_media_type - Gets media type
+ * @pi: port information structure
+ */
+static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
+{
+	struct ice_link_status *hw_link_info;
+
+	if (!pi)
+		return ICE_MEDIA_UNKNOWN;
+
+	hw_link_info = &pi->phy.link_info;
+	if (hw_link_info->phy_type_low && hw_link_info->phy_type_high)
+		/* If more than one media type is selected, report unknown */
+		return ICE_MEDIA_UNKNOWN;
+
+	if (hw_link_info->phy_type_low) {
+		/* 1G SGMII is a special case where some DA cable PHYs
+		 * may show this as an option when it really shouldn't
+		 * be since SGMII is meant to be between a MAC and a PHY
+		 * in a backplane. Try to detect this case and handle it
+		 */
+		if (hw_link_info->phy_type_low == ICE_PHY_TYPE_LOW_1G_SGMII &&
+		    (hw_link_info->module_type[ICE_AQC_MOD_TYPE_IDENT] ==
+		    ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE ||
+		    hw_link_info->module_type[ICE_AQC_MOD_TYPE_IDENT] ==
+		    ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE))
+			return ICE_MEDIA_DA;
+
+		switch (hw_link_info->phy_type_low) {
+		case ICE_PHY_TYPE_LOW_1000BASE_SX:
+		case ICE_PHY_TYPE_LOW_1000BASE_LX:
+		case ICE_PHY_TYPE_LOW_10GBASE_SR:
+		case ICE_PHY_TYPE_LOW_10GBASE_LR:
+		case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+		case ICE_PHY_TYPE_LOW_25GBASE_SR:
+		case ICE_PHY_TYPE_LOW_25GBASE_LR:
+		case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+		case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_SR:
+		case ICE_PHY_TYPE_LOW_50GBASE_FR:
+		case ICE_PHY_TYPE_LOW_50GBASE_LR:
+		case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+		case ICE_PHY_TYPE_LOW_100GBASE_DR:
+		case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
+			return ICE_MEDIA_FIBER;
+		case ICE_PHY_TYPE_LOW_100BASE_TX:
+		case ICE_PHY_TYPE_LOW_1000BASE_T:
+		case ICE_PHY_TYPE_LOW_2500BASE_T:
+		case ICE_PHY_TYPE_LOW_5GBASE_T:
+		case ICE_PHY_TYPE_LOW_10GBASE_T:
+		case ICE_PHY_TYPE_LOW_25GBASE_T:
+			return ICE_MEDIA_BASET;
+		case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+		case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_CP:
+		case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+		case ICE_PHY_TYPE_LOW_100GBASE_CP2:
+			return ICE_MEDIA_DA;
+		case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+		case ICE_PHY_TYPE_LOW_40G_XLAUI:
+		case ICE_PHY_TYPE_LOW_50G_LAUI2:
+		case ICE_PHY_TYPE_LOW_50G_AUI2:
+		case ICE_PHY_TYPE_LOW_50G_AUI1:
+		case ICE_PHY_TYPE_LOW_100G_AUI4:
+		case ICE_PHY_TYPE_LOW_100G_CAUI4:
+			if (ice_is_media_cage_present(pi))
+				return ICE_MEDIA_DA;
+			fallthrough;
+		case ICE_PHY_TYPE_LOW_1000BASE_KX:
+		case ICE_PHY_TYPE_LOW_2500BASE_KX:
+		case ICE_PHY_TYPE_LOW_2500BASE_X:
+		case ICE_PHY_TYPE_LOW_5GBASE_KR:
+		case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+		case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+		case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+		case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+			return ICE_MEDIA_BACKPLANE;
+		}
+	} else {
+		switch (hw_link_info->phy_type_high) {
+		case ICE_PHY_TYPE_HIGH_100G_AUI2:
+		case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+			if (ice_is_media_cage_present(pi))
+				return ICE_MEDIA_DA;
+			fallthrough;
+		case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
+			return ICE_MEDIA_BACKPLANE;
+		case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+		case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+			return ICE_MEDIA_FIBER;
+		}
+	}
+	return ICE_MEDIA_UNKNOWN;
+}
+
+/**
+ * ice_aq_get_link_info
+ * @pi: port information structure
+ * @ena_lse: enable/disable LinkStatusEvent reporting
+ * @link: pointer to link status structure - optional
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Link Status (0x607). Returns the link status of the adapter.
+ */
+enum ice_status
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+		     struct ice_link_status *link, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_link_status_data link_data = { 0 };
+	struct ice_aqc_get_link_status *resp;
+	struct ice_link_status *li_old, *li;
+	enum ice_media_type *hw_media_type;
+	struct ice_fc_info *hw_fc_info;
+	bool tx_pause, rx_pause;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	struct ice_hw *hw;
+	u16 cmd_flags;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+	li_old = &pi->phy.link_info_old;
+	hw_media_type = &pi->phy.media_type;
+	li = &pi->phy.link_info;
+	hw_fc_info = &pi->fc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
+	cmd_flags = (ena_lse) ? ICE_AQ_LSE_ENA : ICE_AQ_LSE_DIS;
+	resp = &desc.params.get_link_status;
+	resp->cmd_flags = cpu_to_le16(cmd_flags);
+	resp->lport_num = pi->lport;
+
+	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), cd);
+
+	if (status)
+		return status;
+
+	/* save off old link status information */
+	*li_old = *li;
+
+	/* update current link status information */
+	li->link_speed = le16_to_cpu(link_data.link_speed);
+	li->phy_type_low = le64_to_cpu(link_data.phy_type_low);
+	li->phy_type_high = le64_to_cpu(link_data.phy_type_high);
+	*hw_media_type = ice_get_media_type(pi);
+	li->link_info = link_data.link_info;
+	li->an_info = link_data.an_info;
+	li->ext_info = link_data.ext_info;
+	li->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+	li->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
+	li->topo_media_conflict = link_data.topo_media_conflict;
+	li->pacing = link_data.cfg & (ICE_AQ_CFG_PACING_M |
+				      ICE_AQ_CFG_PACING_TYPE_M);
+
+	/* update fc info */
+	tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX);
+	rx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_RX);
+	if (tx_pause && rx_pause)
+		hw_fc_info->current_mode = ICE_FC_FULL;
+	else if (tx_pause)
+		hw_fc_info->current_mode = ICE_FC_TX_PAUSE;
+	else if (rx_pause)
+		hw_fc_info->current_mode = ICE_FC_RX_PAUSE;
+	else
+		hw_fc_info->current_mode = ICE_FC_NONE;
+
+	li->lse_ena = !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+
+	ice_debug(hw, ICE_DBG_LINK, "get link info\n");
+	ice_debug(hw, ICE_DBG_LINK, "	link_speed = 0x%x\n", li->link_speed);
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_low = 0x%llx\n",
+		  (unsigned long long)li->phy_type_low);
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_high = 0x%llx\n",
+		  (unsigned long long)li->phy_type_high);
+	ice_debug(hw, ICE_DBG_LINK, "	media_type = 0x%x\n", *hw_media_type);
+	ice_debug(hw, ICE_DBG_LINK, "	link_info = 0x%x\n", li->link_info);
+	ice_debug(hw, ICE_DBG_LINK, "	an_info = 0x%x\n", li->an_info);
+	ice_debug(hw, ICE_DBG_LINK, "	ext_info = 0x%x\n", li->ext_info);
+	ice_debug(hw, ICE_DBG_LINK, "	fec_info = 0x%x\n", li->fec_info);
+	ice_debug(hw, ICE_DBG_LINK, "	lse_ena = 0x%x\n", li->lse_ena);
+	ice_debug(hw, ICE_DBG_LINK, "	max_frame = 0x%x\n",
+		  li->max_frame_size);
+	ice_debug(hw, ICE_DBG_LINK, "	pacing = 0x%x\n", li->pacing);
+
+	/* save link status information */
+	if (link)
+		*link = *li;
+
+	/* flag cleared so calling functions don't call AQ again */
+	pi->phy.get_link_info = false;
+
+	return 0;
+}
+
+/**
+ * ice_fill_tx_timer_and_fc_thresh
+ * @hw: pointer to the HW struct
+ * @cmd: pointer to MAC cfg structure
+ *
+ * Add Tx timer and FC refresh threshold info to Set MAC Config AQ command
+ * descriptor
+ */
+static void
+ice_fill_tx_timer_and_fc_thresh(struct ice_hw *hw,
+				struct ice_aqc_set_mac_cfg *cmd)
+{
+	u16 fc_thres_val, tx_timer_val;
+	u32 val;
+
+	/* We read back the transmit timer and FC threshold value of
+	 * LFC. Thus, we will use index =
+	 * PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX.
+	 *
+	 * Also, because we are operating on transmit timer and FC
+	 * threshold of LFC, we don't turn on any bit in tx_tmr_priority
+	 */
+#define IDX_OF_LFC PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX
+
+	/* Retrieve the transmit timer */
+	val = rd32(hw, PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(IDX_OF_LFC));
+	tx_timer_val = val &
+		PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_M;
+	cmd->tx_tmr_value = cpu_to_le16(tx_timer_val);
+
+	/* Retrieve the FC threshold */
+	val = rd32(hw, PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(IDX_OF_LFC));
+	fc_thres_val = val & PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_M;
+
+	cmd->fc_refresh_threshold = cpu_to_le16(fc_thres_val);
+}
+
+/**
+ * ice_aq_set_mac_cfg
+ * @hw: pointer to the HW struct
+ * @max_frame_size: Maximum Frame Size to be supported
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set MAC configuration (0x0603)
+ */
+enum ice_status
+ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_mac_cfg *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_mac_cfg;
+
+	if (max_frame_size == 0)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_cfg);
+
+	cmd->max_frame_size = cpu_to_le16(max_frame_size);
+
+	ice_fill_tx_timer_and_fc_thresh(hw, cmd);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_init_fltr_mgmt_struct - initializes filter management list and locks
+ * @hw: pointer to the HW struct
+ */
+static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw;
+	enum ice_status status;
+
+	hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw),
+				       sizeof(*hw->switch_info), GFP_KERNEL);
+	sw = hw->switch_info;
+
+	if (!sw)
+		return ICE_ERR_NO_MEMORY;
+
+	INIT_LIST_HEAD(&sw->vsi_list_map_head);
+
+	status = ice_init_def_sw_recp(hw);
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), hw->switch_info);
+		return status;
+	}
+	return 0;
+}
+
+/**
+ * ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks
+ * @hw: pointer to the HW struct
+ */
+static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_vsi_list_map_info *v_pos_map;
+	struct ice_vsi_list_map_info *v_tmp_map;
+	struct ice_sw_recipe *recps;
+	u8 i;
+
+	list_for_each_entry_safe(v_pos_map, v_tmp_map, &sw->vsi_list_map_head,
+				 list_entry) {
+		list_del(&v_pos_map->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), v_pos_map);
+	}
+	recps = hw->switch_info->recp_list;
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+		struct ice_fltr_mgmt_list_entry *lst_itr, *tmp_entry;
+
+		recps[i].root_rid = i;
+		mutex_destroy(&recps[i].filt_rule_lock);
+		list_for_each_entry_safe(lst_itr, tmp_entry,
+					 &recps[i].filt_rules, list_entry) {
+			list_del(&lst_itr->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), lst_itr);
+		}
+	}
+	ice_rm_all_sw_replay_rule_info(hw);
+	devm_kfree(ice_hw_to_dev(hw), sw->recp_list);
+	devm_kfree(ice_hw_to_dev(hw), sw);
+}
+
+/**
+ * ice_get_fw_log_cfg - get FW logging configuration
+ * @hw: pointer to the HW struct
+ */
+static enum ice_status ice_get_fw_log_cfg(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	__le16 *config;
+	u16 size;
+
+	size = sizeof(*config) * ICE_AQC_FW_LOG_ID_MAX;
+	config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL);
+	if (!config)
+		return ICE_ERR_NO_MEMORY;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging_info);
+
+	status = ice_aq_send_cmd(hw, &desc, config, size, NULL);
+	if (!status) {
+		u16 i;
+
+		/* Save FW logging information into the HW structure */
+		for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+			u16 v, m, flgs;
+
+			v = le16_to_cpu(config[i]);
+			m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+			flgs = (v & ICE_AQC_FW_LOG_EN_M) >> ICE_AQC_FW_LOG_EN_S;
+
+			if (m < ICE_AQC_FW_LOG_ID_MAX)
+				hw->fw_log.evnts[m].cur = flgs;
+		}
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), config);
+
+	return status;
+}
+
+/**
+ * ice_cfg_fw_log - configure FW logging
+ * @hw: pointer to the HW struct
+ * @enable: enable certain FW logging events if true, disable all if false
+ *
+ * This function enables/disables the FW logging via Rx CQ events and a UART
+ * port based on predetermined configurations. FW logging via the Rx CQ can be
+ * enabled/disabled for individual PF's. However, FW logging via the UART can
+ * only be enabled/disabled for all PFs on the same device.
+ *
+ * To enable overall FW logging, the "cq_en" and "uart_en" enable bits in
+ * hw->fw_log need to be set accordingly, e.g. based on user-provided input,
+ * before initializing the device.
+ *
+ * When re/configuring FW logging, callers need to update the "cfg" elements of
+ * the hw->fw_log.evnts array with the desired logging event configurations for
+ * modules of interest. When disabling FW logging completely, the callers can
+ * just pass false in the "enable" parameter. On completion, the function will
+ * update the "cur" element of the hw->fw_log.evnts array with the resulting
+ * logging event configurations of the modules that are being re/configured. FW
+ * logging modules that are not part of a reconfiguration operation retain their
+ * previous states.
+ *
+ * Before resetting the device, it is recommended that the driver disables FW
+ * logging before shutting down the control queue. When disabling FW logging
+ * ("enable" = false), the latest configurations of FW logging events stored in
+ * hw->fw_log.evnts[] are not overridden to allow them to be reconfigured after
+ * a device reset.
+ *
+ * When enabling FW logging to emit log messages via the Rx CQ during the
+ * device's initialization phase, a mechanism alternative to interrupt handlers
+ * needs to be used to extract FW log messages from the Rx CQ periodically and
+ * to prevent the Rx CQ from being full and stalling other types of control
+ * messages from FW to SW. Interrupts are typically disabled during the device's
+ * initialization phase.
+ */
+static enum ice_status ice_cfg_fw_log(struct ice_hw *hw, bool enable)
+{
+	struct ice_aqc_fw_logging *cmd;
+	enum ice_status status = 0;
+	u16 i, chgs = 0, len = 0;
+	struct ice_aq_desc desc;
+	__le16 *data = NULL;
+	u8 actv_evnts = 0;
+	void *buf = NULL;
+
+	if (!hw->fw_log.cq_en && !hw->fw_log.uart_en)
+		return 0;
+
+	/* Disable FW logging only when the control queue is still responsive */
+	if (!enable &&
+	    (!hw->fw_log.actv_evnts || !ice_check_sq_alive(hw, &hw->adminq)))
+		return 0;
+
+	/* Get current FW log settings */
+	status = ice_get_fw_log_cfg(hw);
+	if (status)
+		return status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging);
+	cmd = &desc.params.fw_logging;
+
+	/* Indicate which controls are valid */
+	if (hw->fw_log.cq_en)
+		cmd->log_ctrl_valid |= ICE_AQC_FW_LOG_AQ_VALID;
+
+	if (hw->fw_log.uart_en)
+		cmd->log_ctrl_valid |= ICE_AQC_FW_LOG_UART_VALID;
+
+	if (enable) {
+		/* Fill in an array of entries with FW logging modules and
+		 * logging events being reconfigured.
+		 */
+		for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+			u16 val;
+
+			/* Keep track of enabled event types */
+			actv_evnts |= hw->fw_log.evnts[i].cfg;
+
+			if (hw->fw_log.evnts[i].cfg == hw->fw_log.evnts[i].cur)
+				continue;
+
+			if (!data) {
+				data = devm_kcalloc(ice_hw_to_dev(hw),
+						    ICE_AQC_FW_LOG_ID_MAX,
+						    sizeof(*data),
+						    GFP_KERNEL);
+				if (!data)
+					return ICE_ERR_NO_MEMORY;
+			}
+
+			val = i << ICE_AQC_FW_LOG_ID_S;
+			val |= hw->fw_log.evnts[i].cfg << ICE_AQC_FW_LOG_EN_S;
+			data[chgs++] = cpu_to_le16(val);
+		}
+
+		/* Only enable FW logging if at least one module is specified.
+		 * If FW logging is currently enabled but all modules are not
+		 * enabled to emit log messages, disable FW logging altogether.
+		 */
+		if (actv_evnts) {
+			/* Leave if there is effectively no change */
+			if (!chgs)
+				goto out;
+
+			if (hw->fw_log.cq_en)
+				cmd->log_ctrl |= ICE_AQC_FW_LOG_AQ_EN;
+
+			if (hw->fw_log.uart_en)
+				cmd->log_ctrl |= ICE_AQC_FW_LOG_UART_EN;
+
+			buf = data;
+			len = sizeof(*data) * chgs;
+			desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+		}
+	}
+
+	status = ice_aq_send_cmd(hw, &desc, buf, len, NULL);
+	if (!status) {
+		/* Update the current configuration to reflect events enabled.
+		 * hw->fw_log.cq_en and hw->fw_log.uart_en indicate if the FW
+		 * logging mode is enabled for the device. They do not reflect
+		 * actual modules being enabled to emit log messages. So, their
+		 * values remain unchanged even when all modules are disabled.
+		 */
+		u16 cnt = enable ? chgs : (u16)ICE_AQC_FW_LOG_ID_MAX;
+
+		hw->fw_log.actv_evnts = actv_evnts;
+		for (i = 0; i < cnt; i++) {
+			u16 v, m;
+
+			if (!enable) {
+				/* When disabling all FW logging events as part
+				 * of device's de-initialization, the original
+				 * configurations are retained, and can be used
+				 * to reconfigure FW logging later if the device
+				 * is re-initialized.
+				 */
+				hw->fw_log.evnts[i].cur = 0;
+				continue;
+			}
+
+			v = le16_to_cpu(data[i]);
+			m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+			hw->fw_log.evnts[m].cur = hw->fw_log.evnts[m].cfg;
+		}
+	}
+
+out:
+	if (data)
+		devm_kfree(ice_hw_to_dev(hw), data);
+
+	return status;
+}
+
+/**
+ * ice_output_fw_log
+ * @hw: pointer to the HW struct
+ * @desc: pointer to the AQ message descriptor
+ * @buf: pointer to the buffer accompanying the AQ message
+ *
+ * Formats a FW Log message and outputs it via the standard driver logs.
+ */
+void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf)
+{
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg Start ]\n");
+	ice_debug_array(hw, ICE_DBG_FW_LOG, 16, 1, (u8 *)buf,
+			le16_to_cpu(desc->datalen));
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg End ]\n");
+}
+
+/**
+ * ice_get_itr_intrl_gran
+ * @hw: pointer to the HW struct
+ *
+ * Determines the ITR/INTRL granularities based on the maximum aggregate
+ * bandwidth according to the device's configuration during power-on.
+ */
+static void ice_get_itr_intrl_gran(struct ice_hw *hw)
+{
+	u8 max_agg_bw = (rd32(hw, GL_PWR_MODE_CTL) &
+			 GL_PWR_MODE_CTL_CAR_MAX_BW_M) >>
+			GL_PWR_MODE_CTL_CAR_MAX_BW_S;
+
+	switch (max_agg_bw) {
+	case ICE_MAX_AGG_BW_200G:
+	case ICE_MAX_AGG_BW_100G:
+	case ICE_MAX_AGG_BW_50G:
+		hw->itr_gran = ICE_ITR_GRAN_ABOVE_25;
+		hw->intrl_gran = ICE_INTRL_GRAN_ABOVE_25;
+		break;
+	case ICE_MAX_AGG_BW_25G:
+		hw->itr_gran = ICE_ITR_GRAN_MAX_25;
+		hw->intrl_gran = ICE_INTRL_GRAN_MAX_25;
+		break;
+	}
+}
+
+/**
+ * ice_init_hw - main hardware initialization routine
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_init_hw(struct ice_hw *hw)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	enum ice_status status;
+	u16 mac_buf_len;
+	void *mac_buf;
+
+	/* Set MAC type based on DeviceID */
+	status = ice_set_mac_type(hw);
+	if (status)
+		return status;
+
+	hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) &
+			 PF_FUNC_RID_FUNC_NUM_M) >>
+		PF_FUNC_RID_FUNC_NUM_S;
+
+	status = ice_reset(hw, ICE_RESET_PFR);
+	if (status)
+		return status;
+
+	ice_get_itr_intrl_gran(hw);
+
+	status = ice_create_all_ctrlq(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	/* Enable FW logging. Not fatal if this fails. */
+	status = ice_cfg_fw_log(hw, true);
+	if (status)
+		ice_debug(hw, ICE_DBG_INIT, "Failed to enable FW logging.\n");
+
+	status = ice_clear_pf_cfg(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	/* Set bit to enable Flow Director filters */
+	wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
+	INIT_LIST_HEAD(&hw->fdir_list_head);
+
+	ice_clear_pxe_mode(hw);
+
+	status = ice_init_nvm(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	status = ice_get_caps(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	hw->port_info = devm_kzalloc(ice_hw_to_dev(hw),
+				     sizeof(*hw->port_info), GFP_KERNEL);
+	if (!hw->port_info) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_unroll_cqinit;
+	}
+
+	/* set the back pointer to HW */
+	hw->port_info->hw = hw;
+
+	/* Initialize port_info struct with switch configuration data */
+	status = ice_get_initial_sw_cfg(hw);
+	if (status)
+		goto err_unroll_alloc;
+
+	hw->evb_veb = true;
+
+	/* Query the allocated resources for Tx scheduler */
+	status = ice_sched_query_res_alloc(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_SCHED,
+			  "Failed to get scheduler allocated resources\n");
+		goto err_unroll_alloc;
+	}
+
+	/* Initialize port_info struct with scheduler data */
+	status = ice_sched_init_port(hw->port_info);
+	if (status)
+		goto err_unroll_sched;
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_unroll_sched;
+	}
+
+	/* Initialize port_info struct with PHY capabilities */
+	status = ice_aq_get_phy_caps(hw->port_info, false,
+				     ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
+				     NULL);
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	if (status)
+		goto err_unroll_sched;
+
+	/* Initialize port_info struct with link information */
+	status = ice_aq_get_link_info(hw->port_info, false, NULL, NULL);
+	if (status)
+		goto err_unroll_sched;
+
+	/* need a valid SW entry point to build a Tx tree */
+	if (!hw->sw_entry_point_layer) {
+		ice_debug(hw, ICE_DBG_SCHED, "invalid sw entry point\n");
+		status = ICE_ERR_CFG;
+		goto err_unroll_sched;
+	}
+	INIT_LIST_HEAD(&hw->agg_list);
+	/* Initialize max burst size */
+	if (!hw->max_burst_size)
+		ice_cfg_rl_burst_size(hw, ICE_SCHED_DFLT_BURST_SIZE);
+
+	status = ice_init_fltr_mgmt_struct(hw);
+	if (status)
+		goto err_unroll_sched;
+
+	/* Get MAC information */
+	/* A single port can report up to two (LAN and WoL) addresses */
+	mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
+			       sizeof(struct ice_aqc_manage_mac_read_resp),
+			       GFP_KERNEL);
+	mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp);
+
+	if (!mac_buf) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_unroll_fltr_mgmt_struct;
+	}
+
+	status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
+	devm_kfree(ice_hw_to_dev(hw), mac_buf);
+
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	/* enable jumbo frame support at MAC level */
+	status = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	/* Obtain counter base index which would be used by flow director */
+	status = ice_alloc_fd_res_cntr(hw, &hw->fd_ctr_base);
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	status = ice_init_hw_tbls(hw);
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	mutex_init(&hw->tnl_lock);
+	return 0;
+
+err_unroll_fltr_mgmt_struct:
+	ice_cleanup_fltr_mgmt_struct(hw);
+err_unroll_sched:
+	ice_sched_cleanup_all(hw);
+err_unroll_alloc:
+	devm_kfree(ice_hw_to_dev(hw), hw->port_info);
+err_unroll_cqinit:
+	ice_destroy_all_ctrlq(hw);
+	return status;
+}
+
+/**
+ * ice_deinit_hw - unroll initialization operations done by ice_init_hw
+ * @hw: pointer to the hardware structure
+ *
+ * This should be called only during nominal operation, not as a result of
+ * ice_init_hw() failing since ice_init_hw() will take care of unrolling
+ * applicable initializations if it fails for any reason.
+ */
+void ice_deinit_hw(struct ice_hw *hw)
+{
+	ice_free_fd_res_cntr(hw, hw->fd_ctr_base);
+	ice_cleanup_fltr_mgmt_struct(hw);
+
+	ice_sched_cleanup_all(hw);
+	ice_sched_clear_agg(hw);
+	ice_free_seg(hw);
+	ice_free_hw_tbls(hw);
+	mutex_destroy(&hw->tnl_lock);
+
+	if (hw->port_info) {
+		devm_kfree(ice_hw_to_dev(hw), hw->port_info);
+		hw->port_info = NULL;
+	}
+
+	/* Attempt to disable FW logging before shutting down control queues */
+	ice_cfg_fw_log(hw, false);
+	ice_destroy_all_ctrlq(hw);
+
+	/* Clear VSI contexts if not already cleared */
+	ice_clear_all_vsi_ctx(hw);
+}
+
+/**
+ * ice_check_reset - Check to see if a global reset is complete
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_check_reset(struct ice_hw *hw)
+{
+	u32 cnt, reg = 0, grst_timeout, uld_mask;
+
+	/* Poll for Device Active state in case a recent CORER, GLOBR,
+	 * or EMPR has occurred. The grst delay value is in 100ms units.
+	 * Add 1sec for outstanding AQ commands that can take a long time.
+	 */
+	grst_timeout = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >>
+			GLGEN_RSTCTL_GRSTDEL_S) + 10;
+
+	for (cnt = 0; cnt < grst_timeout; cnt++) {
+		mdelay(100);
+		reg = rd32(hw, GLGEN_RSTAT);
+		if (!(reg & GLGEN_RSTAT_DEVSTATE_M))
+			break;
+	}
+
+	if (cnt == grst_timeout) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Global reset polling failed to complete.\n");
+		return ICE_ERR_RESET_FAILED;
+	}
+
+#define ICE_RESET_DONE_MASK	(GLNVM_ULD_PCIER_DONE_M |\
+				 GLNVM_ULD_PCIER_DONE_1_M |\
+				 GLNVM_ULD_CORER_DONE_M |\
+				 GLNVM_ULD_GLOBR_DONE_M |\
+				 GLNVM_ULD_POR_DONE_M |\
+				 GLNVM_ULD_POR_DONE_1_M |\
+				 GLNVM_ULD_PCIER_DONE_2_M)
+
+	uld_mask = ICE_RESET_DONE_MASK;
+
+	/* Device is Active; check Global Reset processes are done */
+	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
+		reg = rd32(hw, GLNVM_ULD) & uld_mask;
+		if (reg == uld_mask) {
+			ice_debug(hw, ICE_DBG_INIT,
+				  "Global reset processes done. %d\n", cnt);
+			break;
+		}
+		mdelay(10);
+	}
+
+	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Wait for Reset Done timed out. GLNVM_ULD = 0x%x\n",
+			  reg);
+		return ICE_ERR_RESET_FAILED;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_pf_reset - Reset the PF
+ * @hw: pointer to the hardware structure
+ *
+ * If a global reset has been triggered, this function checks
+ * for its completion and then issues the PF reset
+ */
+static enum ice_status ice_pf_reset(struct ice_hw *hw)
+{
+	u32 cnt, reg;
+
+	/* If at function entry a global reset was already in progress, i.e.
+	 * state is not 'device active' or any of the reset done bits are not
+	 * set in GLNVM_ULD, there is no need for a PF Reset; poll until the
+	 * global reset is done.
+	 */
+	if ((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) ||
+	    (rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK) ^ ICE_RESET_DONE_MASK) {
+		/* poll on global reset currently in progress until done */
+		if (ice_check_reset(hw))
+			return ICE_ERR_RESET_FAILED;
+
+		return 0;
+	}
+
+	/* Reset the PF */
+	reg = rd32(hw, PFGEN_CTRL);
+
+	wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M));
+
+	/* Wait for the PFR to complete. The wait time is the global config lock
+	 * timeout plus the PFR timeout which will account for a possible reset
+	 * that is occurring during a download package operation.
+	 */
+	for (cnt = 0; cnt < ICE_GLOBAL_CFG_LOCK_TIMEOUT +
+	     ICE_PF_RESET_WAIT_COUNT; cnt++) {
+		reg = rd32(hw, PFGEN_CTRL);
+		if (!(reg & PFGEN_CTRL_PFSWR_M))
+			break;
+
+		mdelay(1);
+	}
+
+	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "PF reset polling failed to complete.\n");
+		return ICE_ERR_RESET_FAILED;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reset - Perform different types of reset
+ * @hw: pointer to the hardware structure
+ * @req: reset request
+ *
+ * This function triggers a reset as specified by the req parameter.
+ *
+ * Note:
+ * If anything other than a PF reset is triggered, PXE mode is restored.
+ * This has to be cleared using ice_clear_pxe_mode again, once the AQ
+ * interface has been restored in the rebuild flow.
+ */
+enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req)
+{
+	u32 val = 0;
+
+	switch (req) {
+	case ICE_RESET_PFR:
+		return ice_pf_reset(hw);
+	case ICE_RESET_CORER:
+		ice_debug(hw, ICE_DBG_INIT, "CoreR requested\n");
+		val = GLGEN_RTRIG_CORER_M;
+		break;
+	case ICE_RESET_GLOBR:
+		ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n");
+		val = GLGEN_RTRIG_GLOBR_M;
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+
+	val |= rd32(hw, GLGEN_RTRIG);
+	wr32(hw, GLGEN_RTRIG, val);
+	ice_flush(hw);
+
+	/* wait for the FW to be ready */
+	return ice_check_reset(hw);
+}
+
+/**
+ * ice_copy_rxq_ctx_to_hw
+ * @hw: pointer to the hardware structure
+ * @ice_rxq_ctx: pointer to the rxq context
+ * @rxq_index: the index of the Rx queue
+ *
+ * Copies rxq context from dense structure to HW register space
+ */
+static enum ice_status
+ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index)
+{
+	u8 i;
+
+	if (!ice_rxq_ctx)
+		return ICE_ERR_BAD_PTR;
+
+	if (rxq_index > QRX_CTRL_MAX_INDEX)
+		return ICE_ERR_PARAM;
+
+	/* Copy each dword separately to HW */
+	for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
+		wr32(hw, QRX_CONTEXT(i, rxq_index),
+		     *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+
+		ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i,
+			  *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+	}
+
+	return 0;
+}
+
+/* LAN Rx Queue Context */
+static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
+	/* Field		Width	LSB */
+	ICE_CTX_STORE(ice_rlan_ctx, head,		13,	0),
+	ICE_CTX_STORE(ice_rlan_ctx, cpuid,		8,	13),
+	ICE_CTX_STORE(ice_rlan_ctx, base,		57,	32),
+	ICE_CTX_STORE(ice_rlan_ctx, qlen,		13,	89),
+	ICE_CTX_STORE(ice_rlan_ctx, dbuf,		7,	102),
+	ICE_CTX_STORE(ice_rlan_ctx, hbuf,		5,	109),
+	ICE_CTX_STORE(ice_rlan_ctx, dtype,		2,	114),
+	ICE_CTX_STORE(ice_rlan_ctx, dsize,		1,	116),
+	ICE_CTX_STORE(ice_rlan_ctx, crcstrip,		1,	117),
+	ICE_CTX_STORE(ice_rlan_ctx, l2tsel,		1,	119),
+	ICE_CTX_STORE(ice_rlan_ctx, hsplit_0,		4,	120),
+	ICE_CTX_STORE(ice_rlan_ctx, hsplit_1,		2,	124),
+	ICE_CTX_STORE(ice_rlan_ctx, showiv,		1,	127),
+	ICE_CTX_STORE(ice_rlan_ctx, rxmax,		14,	174),
+	ICE_CTX_STORE(ice_rlan_ctx, tphrdesc_ena,	1,	193),
+	ICE_CTX_STORE(ice_rlan_ctx, tphwdesc_ena,	1,	194),
+	ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena,	1,	195),
+	ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena,	1,	196),
+	ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh,		3,	198),
+	ICE_CTX_STORE(ice_rlan_ctx, prefena,		1,	201),
+	{ 0 }
+};
+
+/**
+ * ice_write_rxq_ctx
+ * @hw: pointer to the hardware structure
+ * @rlan_ctx: pointer to the rxq context
+ * @rxq_index: the index of the Rx queue
+ *
+ * Converts rxq context from sparse to dense structure and then writes
+ * it to HW register space and enables the hardware to prefetch descriptors
+ * instead of only fetching them on demand
+ */
+enum ice_status
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+		  u32 rxq_index)
+{
+	u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 };
+
+	if (!rlan_ctx)
+		return ICE_ERR_BAD_PTR;
+
+	rlan_ctx->prefena = 1;
+
+	ice_set_ctx(hw, (u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
+	return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index);
+}
+
+/* LAN Tx Queue Context */
+const struct ice_ctx_ele ice_tlan_ctx_info[] = {
+				    /* Field			Width	LSB */
+	ICE_CTX_STORE(ice_tlan_ctx, base,			57,	0),
+	ICE_CTX_STORE(ice_tlan_ctx, port_num,			3,	57),
+	ICE_CTX_STORE(ice_tlan_ctx, cgd_num,			5,	60),
+	ICE_CTX_STORE(ice_tlan_ctx, pf_num,			3,	65),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_num,			10,	68),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_type,			2,	78),
+	ICE_CTX_STORE(ice_tlan_ctx, src_vsi,			10,	80),
+	ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena,			1,	90),
+	ICE_CTX_STORE(ice_tlan_ctx, internal_usage_flag,	1,	91),
+	ICE_CTX_STORE(ice_tlan_ctx, alt_vlan,			1,	92),
+	ICE_CTX_STORE(ice_tlan_ctx, cpuid,			8,	93),
+	ICE_CTX_STORE(ice_tlan_ctx, wb_mode,			1,	101),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc,			1,	102),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd,			1,	103),
+	ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc,			1,	104),
+	ICE_CTX_STORE(ice_tlan_ctx, cmpq_id,			9,	105),
+	ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func,		14,	114),
+	ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode,	1,	128),
+	ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id,		6,	129),
+	ICE_CTX_STORE(ice_tlan_ctx, qlen,			13,	135),
+	ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx,		4,	148),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_ena,			1,	152),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_qnum,			11,	153),
+	ICE_CTX_STORE(ice_tlan_ctx, legacy_int,			1,	164),
+	ICE_CTX_STORE(ice_tlan_ctx, drop_ena,			1,	165),
+	ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx,		2,	166),
+	ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx,	3,	168),
+	ICE_CTX_STORE(ice_tlan_ctx, int_q_state,		122,	171),
+	{ 0 }
+};
+
+/* FW Admin Queue command wrappers */
+
+/* Software lock/mutex that is meant to be held while the Global Config Lock
+ * in firmware is acquired by the software to prevent most (but not all) types
+ * of AQ commands from being sent to FW
+ */
+DEFINE_MUTEX(ice_global_cfg_lock_sw);
+
+/**
+ * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
+ * @hw: pointer to the HW struct
+ * @desc: descriptor describing the command
+ * @buf: buffer to use for indirect commands (NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * Helper function to send FW Admin Queue commands to the FW Admin Queue.
+ */
+enum ice_status
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
+		u16 buf_size, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd = &desc->params.res_owner;
+	bool lock_acquired = false;
+	enum ice_status status;
+
+	/* When a package download is in process (i.e. when the firmware's
+	 * Global Configuration Lock resource is held), only the Download
+	 * Package, Get Version, Get Package Info List and Release Resource
+	 * (with resource ID set to Global Config Lock) AdminQ commands are
+	 * allowed; all others must block until the package download completes
+	 * and the Global Config Lock is released.  See also
+	 * ice_acquire_global_cfg_lock().
+	 */
+	switch (le16_to_cpu(desc->opcode)) {
+	case ice_aqc_opc_download_pkg:
+	case ice_aqc_opc_get_pkg_info_list:
+	case ice_aqc_opc_get_ver:
+		break;
+	case ice_aqc_opc_release_res:
+		if (le16_to_cpu(cmd->res_id) == ICE_AQC_RES_ID_GLBL_LOCK)
+			break;
+		fallthrough;
+	default:
+		mutex_lock(&ice_global_cfg_lock_sw);
+		lock_acquired = true;
+		break;
+	}
+
+	status = ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd);
+	if (lock_acquired)
+		mutex_unlock(&ice_global_cfg_lock_sw);
+
+	return status;
+}
+
+/**
+ * ice_aq_get_fw_ver
+ * @hw: pointer to the HW struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the firmware version (0x0001) from the admin queue commands
+ */
+enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_ver *resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	resp = &desc.params.get_ver;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_ver);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	if (!status) {
+		hw->fw_branch = resp->fw_branch;
+		hw->fw_maj_ver = resp->fw_major;
+		hw->fw_min_ver = resp->fw_minor;
+		hw->fw_patch = resp->fw_patch;
+		hw->fw_build = le32_to_cpu(resp->fw_build);
+		hw->api_branch = resp->api_branch;
+		hw->api_maj_ver = resp->api_major;
+		hw->api_min_ver = resp->api_minor;
+		hw->api_patch = resp->api_patch;
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_send_driver_ver
+ * @hw: pointer to the HW struct
+ * @dv: driver's major, minor version
+ * @cd: pointer to command details structure or NULL
+ *
+ * Send the driver version (0x0002) to the firmware
+ */
+enum ice_status
+ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_driver_ver *cmd;
+	struct ice_aq_desc desc;
+	u16 len;
+
+	cmd = &desc.params.driver_ver;
+
+	if (!dv)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_ver);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->major_ver = dv->major_ver;
+	cmd->minor_ver = dv->minor_ver;
+	cmd->build_ver = dv->build_ver;
+	cmd->subbuild_ver = dv->subbuild_ver;
+
+	len = 0;
+	while (len < sizeof(dv->driver_string) &&
+	       isascii(dv->driver_string[len]) && dv->driver_string[len])
+		len++;
+
+	return ice_aq_send_cmd(hw, &desc, dv->driver_string, len, cd);
+}
+
+/**
+ * ice_aq_q_shutdown
+ * @hw: pointer to the HW struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well (0x0003).
+ */
+enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
+{
+	struct ice_aqc_q_shutdown *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.q_shutdown;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown);
+
+	if (unloading)
+		cmd->driver_unloading = ICE_AQC_DRIVER_UNLOADING;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_req_res
+ * @hw: pointer to the HW struct
+ * @res: resource ID
+ * @access: access type
+ * @sdp_number: resource number
+ * @timeout: the maximum time in ms that the driver may hold the resource
+ * @cd: pointer to command details structure or NULL
+ *
+ * Requests common resource using the admin queue commands (0x0008).
+ * When attempting to acquire the Global Config Lock, the driver can
+ * learn of three states:
+ *  1) ICE_SUCCESS -        acquired lock, and can perform download package
+ *  2) ICE_ERR_AQ_ERROR -   did not get lock, driver should fail to load
+ *  3) ICE_ERR_AQ_NO_WORK - did not get lock, but another driver has
+ *                          successfully downloaded the package; the driver does
+ *                          not have to download the package and can continue
+ *                          loading
+ *
+ * Note that if the caller is in an acquire lock, perform action, release lock
+ * phase of operation, it is possible that the FW may detect a timeout and issue
+ * a CORER. In this case, the driver will receive a CORER interrupt and will
+ * have to determine its cause. The calling thread that is handling this flow
+ * will likely get an error propagated back to it indicating the Download
+ * Package, Update Package or the Release Resource AQ commands timed out.
+ */
+static enum ice_status
+ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+	       enum ice_aq_res_access_type access, u8 sdp_number, u32 *timeout,
+	       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd_resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd_resp = &desc.params.res_owner;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_req_res);
+
+	cmd_resp->res_id = cpu_to_le16(res);
+	cmd_resp->access_type = cpu_to_le16(access);
+	cmd_resp->res_number = cpu_to_le32(sdp_number);
+	cmd_resp->timeout = cpu_to_le32(*timeout);
+	*timeout = 0;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	/* The completion specifies the maximum time in ms that the driver
+	 * may hold the resource in the Timeout field.
+	 */
+
+	/* Global config lock response utilizes an additional status field.
+	 *
+	 * If the Global config lock resource is held by some other driver, the
+	 * command completes with ICE_AQ_RES_GLBL_IN_PROG in the status field
+	 * and the timeout field indicates the maximum time the current owner
+	 * of the resource has to free it.
+	 */
+	if (res == ICE_GLOBAL_CFG_LOCK_RES_ID) {
+		if (le16_to_cpu(cmd_resp->status) == ICE_AQ_RES_GLBL_SUCCESS) {
+			*timeout = le32_to_cpu(cmd_resp->timeout);
+			return 0;
+		} else if (le16_to_cpu(cmd_resp->status) ==
+			   ICE_AQ_RES_GLBL_IN_PROG) {
+			*timeout = le32_to_cpu(cmd_resp->timeout);
+			return ICE_ERR_AQ_ERROR;
+		} else if (le16_to_cpu(cmd_resp->status) ==
+			   ICE_AQ_RES_GLBL_DONE) {
+			return ICE_ERR_AQ_NO_WORK;
+		}
+
+		/* invalid FW response, force a timeout immediately */
+		*timeout = 0;
+		return ICE_ERR_AQ_ERROR;
+	}
+
+	/* If the resource is held by some other driver, the command completes
+	 * with a busy return value and the timeout field indicates the maximum
+	 * time the current owner of the resource has to free it.
+	 */
+	if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)
+		*timeout = le32_to_cpu(cmd_resp->timeout);
+
+	return status;
+}
+
+/**
+ * ice_aq_release_res
+ * @hw: pointer to the HW struct
+ * @res: resource ID
+ * @sdp_number: resource number
+ * @cd: pointer to command details structure or NULL
+ *
+ * release common resource using the admin queue commands (0x0009)
+ */
+static enum ice_status
+ice_aq_release_res(struct ice_hw *hw, enum ice_aq_res_ids res, u8 sdp_number,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.res_owner;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_release_res);
+
+	cmd->res_id = cpu_to_le16(res);
+	cmd->res_number = cpu_to_le32(sdp_number);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_acquire_res
+ * @hw: pointer to the HW structure
+ * @res: resource ID
+ * @access: access type (read or write)
+ * @timeout: timeout in milliseconds
+ *
+ * This function will attempt to acquire the ownership of a resource.
+ */
+enum ice_status
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+		enum ice_aq_res_access_type access, u32 timeout)
+{
+#define ICE_RES_POLLING_DELAY_MS	10
+	u32 delay = ICE_RES_POLLING_DELAY_MS;
+	u32 time_left = timeout;
+	enum ice_status status;
+
+	status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+	/* A return code of ICE_ERR_AQ_NO_WORK means that another driver has
+	 * previously acquired the resource and performed any necessary updates;
+	 * in this case the caller does not obtain the resource and has no
+	 * further work to do.
+	 */
+	if (status == ICE_ERR_AQ_NO_WORK)
+		goto ice_acquire_res_exit;
+
+	if (status)
+		ice_debug(hw, ICE_DBG_RES,
+			  "resource %d acquire type %d failed.\n", res, access);
+
+	/* If necessary, poll until the current lock owner timeouts */
+	timeout = time_left;
+	while (status && timeout && time_left) {
+		mdelay(delay);
+		timeout = (timeout > delay) ? timeout - delay : 0;
+		status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+		if (status == ICE_ERR_AQ_NO_WORK)
+			/* lock free, but no work to do */
+			break;
+
+		if (!status)
+			/* lock acquired */
+			break;
+	}
+	if (status && status != ICE_ERR_AQ_NO_WORK)
+		ice_debug(hw, ICE_DBG_RES, "resource acquire timed out.\n");
+
+ice_acquire_res_exit:
+	if (status == ICE_ERR_AQ_NO_WORK) {
+		if (access == ICE_RES_WRITE)
+			ice_debug(hw, ICE_DBG_RES,
+				  "resource indicates no work to do.\n");
+		else
+			ice_debug(hw, ICE_DBG_RES,
+				  "Warning: ICE_ERR_AQ_NO_WORK not expected\n");
+	}
+	return status;
+}
+
+/**
+ * ice_release_res
+ * @hw: pointer to the HW structure
+ * @res: resource ID
+ *
+ * This function will release a resource using the proper Admin Command.
+ */
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
+{
+	enum ice_status status;
+	u32 total_delay = 0;
+
+	status = ice_aq_release_res(hw, res, 0, NULL);
+
+	/* there are some rare cases when trying to release the resource
+	 * results in an admin queue timeout, so handle them correctly
+	 */
+	while ((status == ICE_ERR_AQ_TIMEOUT) &&
+	       (total_delay < hw->adminq.sq_cmd_timeout)) {
+		mdelay(1);
+		status = ice_aq_release_res(hw, res, 0, NULL);
+		total_delay++;
+	}
+}
+
+/**
+ * ice_aq_alloc_free_res - command to allocate/free resources
+ * @hw: pointer to the HW struct
+ * @num_entries: number of resource entries in buffer
+ * @buf: Indirect buffer to hold data parameters and response
+ * @buf_size: size of buffer for indirect commands
+ * @opc: pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Helper function to allocate/free resources using the admin queue commands
+ */
+enum ice_status
+ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
+		      struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+		      enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_alloc_free_res_cmd *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.sw_res_ctrl;
+
+	if (!buf)
+		return ICE_ERR_PARAM;
+
+	if (buf_size < (num_entries * sizeof(buf->elem[0])))
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_entries = cpu_to_le16(num_entries);
+
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_alloc_hw_res - allocate resource
+ * @hw: pointer to the HW struct
+ * @type: type of resource
+ * @num: number of resources to allocate
+ * @btm: allocate from bottom
+ * @res: pointer to array that will receive the resources
+ */
+enum ice_status
+ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	enum ice_status status;
+	u16 buf_len;
+
+	buf_len = struct_size(buf, elem, num);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Prepare buffer to allocate resource. */
+	buf->num_elems = cpu_to_le16(num);
+	buf->res_type = cpu_to_le16(type | ICE_AQC_RES_TYPE_FLAG_DEDICATED |
+				    ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX);
+	if (btm)
+		buf->res_type |= cpu_to_le16(ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM);
+
+	status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+				       ice_aqc_opc_alloc_res, NULL);
+	if (status)
+		goto ice_alloc_res_exit;
+
+	memcpy(res, buf->elem, sizeof(*buf->elem) * num);
+
+ice_alloc_res_exit:
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_free_hw_res - free allocated HW resource
+ * @hw: pointer to the HW struct
+ * @type: type of resource to free
+ * @num: number of resources
+ * @res: pointer to array that contains the resources to free
+ */
+enum ice_status ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	enum ice_status status;
+	u16 buf_len;
+
+	buf_len = struct_size(buf, elem, num);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Prepare buffer to free resource. */
+	buf->num_elems = cpu_to_le16(num);
+	buf->res_type = cpu_to_le16(type);
+	memcpy(buf->elem, res, sizeof(*buf->elem) * num);
+
+	status = ice_aq_alloc_free_res(hw, num, buf, buf_len,
+				       ice_aqc_opc_free_res, NULL);
+	if (status)
+		ice_debug(hw, ICE_DBG_SW, "CQ CMD Buffer:\n");
+
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_get_num_per_func - determine number of resources per PF
+ * @hw: pointer to the HW structure
+ * @max: value to be evenly split between each PF
+ *
+ * Determine the number of valid functions by going through the bitmap returned
+ * from parsing capabilities and use this to calculate the number of resources
+ * per PF based on the max value passed in.
+ */
+static u32 ice_get_num_per_func(struct ice_hw *hw, u32 max)
+{
+	u8 funcs;
+
+#define ICE_CAPS_VALID_FUNCS_M	0xFF
+	funcs = hweight8(hw->dev_caps.common_cap.valid_functions &
+			 ICE_CAPS_VALID_FUNCS_M);
+
+	if (!funcs)
+		return 0;
+
+	return max / funcs;
+}
+
+/**
+ * ice_parse_common_caps - parse common device/function capabilities
+ * @hw: pointer to the HW struct
+ * @caps: pointer to common capabilities structure
+ * @elem: the capability element to parse
+ * @prefix: message prefix for tracing capabilities
+ *
+ * Given a capability element, extract relevant details into the common
+ * capability structure.
+ *
+ * Returns: true if the capability matches one of the common capability ids,
+ * false otherwise.
+ */
+static bool
+ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
+		      struct ice_aqc_list_caps_elem *elem, const char *prefix)
+{
+	u32 logical_id = le32_to_cpu(elem->logical_id);
+	u32 phys_id = le32_to_cpu(elem->phys_id);
+	u32 number = le32_to_cpu(elem->number);
+	u16 cap = le16_to_cpu(elem->cap);
+	bool found = true;
+
+	switch (cap) {
+	case ICE_AQC_CAPS_VALID_FUNCTIONS:
+		caps->valid_functions = number;
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: valid_functions (bitmap) = %d\n", prefix,
+			  caps->valid_functions);
+		break;
+	case ICE_AQC_CAPS_SRIOV:
+		caps->sr_iov_1_1 = (number == 1);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: sr_iov_1_1 = %d\n", prefix,
+			  caps->sr_iov_1_1);
+		break;
+	case ICE_AQC_CAPS_DCB:
+		caps->dcb = (number == 1);
+		caps->active_tc_bitmap = logical_id;
+		caps->maxtc = phys_id;
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: dcb = %d\n", prefix, caps->dcb);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: active_tc_bitmap = %d\n", prefix,
+			  caps->active_tc_bitmap);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: maxtc = %d\n", prefix, caps->maxtc);
+		break;
+	case ICE_AQC_CAPS_RSS:
+		caps->rss_table_size = number;
+		caps->rss_table_entry_width = logical_id;
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: rss_table_size = %d\n", prefix,
+			  caps->rss_table_size);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: rss_table_entry_width = %d\n", prefix,
+			  caps->rss_table_entry_width);
+		break;
+	case ICE_AQC_CAPS_RXQS:
+		caps->num_rxq = number;
+		caps->rxq_first_id = phys_id;
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: num_rxq = %d\n", prefix,
+			  caps->num_rxq);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: rxq_first_id = %d\n", prefix,
+			  caps->rxq_first_id);
+		break;
+	case ICE_AQC_CAPS_TXQS:
+		caps->num_txq = number;
+		caps->txq_first_id = phys_id;
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: num_txq = %d\n", prefix,
+			  caps->num_txq);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: txq_first_id = %d\n", prefix,
+			  caps->txq_first_id);
+		break;
+	case ICE_AQC_CAPS_MSIX:
+		caps->num_msix_vectors = number;
+		caps->msix_vector_first_id = phys_id;
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: num_msix_vectors = %d\n", prefix,
+			  caps->num_msix_vectors);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: msix_vector_first_id = %d\n", prefix,
+			  caps->msix_vector_first_id);
+		break;
+	case ICE_AQC_CAPS_PENDING_NVM_VER:
+		caps->nvm_update_pending_nvm = true;
+		ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_nvm\n", prefix);
+		break;
+	case ICE_AQC_CAPS_PENDING_OROM_VER:
+		caps->nvm_update_pending_orom = true;
+		ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_orom\n", prefix);
+		break;
+	case ICE_AQC_CAPS_PENDING_NET_VER:
+		caps->nvm_update_pending_netlist = true;
+		ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_netlist\n", prefix);
+		break;
+	case ICE_AQC_CAPS_NVM_MGMT:
+		caps->nvm_unified_update =
+			(number & ICE_NVM_MGMT_UNIFIED_UPD_SUPPORT) ?
+			true : false;
+		ice_debug(hw, ICE_DBG_INIT, "%s: nvm_unified_update = %d\n", prefix,
+			  caps->nvm_unified_update);
+		break;
+	case ICE_AQC_CAPS_MAX_MTU:
+		caps->max_mtu = number;
+		ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
+			  prefix, caps->max_mtu);
+		break;
+	default:
+		/* Not one of the recognized common capabilities */
+		found = false;
+	}
+
+	return found;
+}
+
+/**
+ * ice_recalc_port_limited_caps - Recalculate port limited capabilities
+ * @hw: pointer to the HW structure
+ * @caps: pointer to capabilities structure to fix
+ *
+ * Re-calculate the capabilities that are dependent on the number of physical
+ * ports; i.e. some features are not supported or function differently on
+ * devices with more than 4 ports.
+ */
+static void
+ice_recalc_port_limited_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps)
+{
+	/* This assumes device capabilities are always scanned before function
+	 * capabilities during the initialization flow.
+	 */
+	if (hw->dev_caps.num_funcs > 4) {
+		/* Max 4 TCs per port */
+		caps->maxtc = 4;
+		ice_debug(hw, ICE_DBG_INIT,
+			  "reducing maxtc to %d (based on #ports)\n",
+			  caps->maxtc);
+	}
+}
+
+/**
+ * ice_parse_vf_func_caps - Parse ICE_AQC_CAPS_VF function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_VF.
+ */
+static void
+ice_parse_vf_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+		       struct ice_aqc_list_caps_elem *cap)
+{
+	u32 logical_id = le32_to_cpu(cap->logical_id);
+	u32 number = le32_to_cpu(cap->number);
+
+	func_p->num_allocd_vfs = number;
+	func_p->vf_base_id = logical_id;
+	ice_debug(hw, ICE_DBG_INIT, "func caps: num_allocd_vfs = %d\n",
+		  func_p->num_allocd_vfs);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: vf_base_id = %d\n",
+		  func_p->vf_base_id);
+}
+
+/**
+ * ice_parse_vsi_func_caps - Parse ICE_AQC_CAPS_VSI function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_VSI.
+ */
+static void
+ice_parse_vsi_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+			struct ice_aqc_list_caps_elem *cap)
+{
+	func_p->guar_num_vsi = ice_get_num_per_func(hw, ICE_MAX_VSI);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: guar_num_vsi (fw) = %d\n",
+		  le32_to_cpu(cap->number));
+	ice_debug(hw, ICE_DBG_INIT, "func caps: guar_num_vsi = %d\n",
+		  func_p->guar_num_vsi);
+}
+
+/**
+ * ice_parse_fdir_func_caps - Parse ICE_AQC_CAPS_FD function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_FD.
+ */
+static void
+ice_parse_fdir_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p)
+{
+	u32 reg_val, val;
+
+	reg_val = rd32(hw, GLQF_FD_SIZE);
+	val = (reg_val & GLQF_FD_SIZE_FD_GSIZE_M) >>
+		GLQF_FD_SIZE_FD_GSIZE_S;
+	func_p->fd_fltr_guar =
+		ice_get_num_per_func(hw, val);
+	val = (reg_val & GLQF_FD_SIZE_FD_BSIZE_M) >>
+		GLQF_FD_SIZE_FD_BSIZE_S;
+	func_p->fd_fltr_best_effort = val;
+
+	ice_debug(hw, ICE_DBG_INIT,
+		  "func caps: fd_fltr_guar = %d\n",
+		  func_p->fd_fltr_guar);
+	ice_debug(hw, ICE_DBG_INIT,
+		  "func caps: fd_fltr_best_effort = %d\n",
+		  func_p->fd_fltr_best_effort);
+}
+
+/**
+ * ice_parse_func_caps - Parse function capabilities
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @buf: buffer containing the function capability records
+ * @cap_count: the number of capabilities
+ *
+ * Helper function to parse function (0x000A) capabilities list. For
+ * capabilities shared between device and function, this relies on
+ * ice_parse_common_caps.
+ *
+ * Loop through the list of provided capabilities and extract the relevant
+ * data into the function capabilities structured.
+ */
+static void
+ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+		    void *buf, u32 cap_count)
+{
+	struct ice_aqc_list_caps_elem *cap_resp;
+	u32 i;
+
+	cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+
+	memset(func_p, 0, sizeof(*func_p));
+
+	for (i = 0; i < cap_count; i++) {
+		u16 cap = le16_to_cpu(cap_resp[i].cap);
+		bool found;
+
+		found = ice_parse_common_caps(hw, &func_p->common_cap,
+					      &cap_resp[i], "func caps");
+
+		switch (cap) {
+		case ICE_AQC_CAPS_VF:
+			ice_parse_vf_func_caps(hw, func_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_VSI:
+			ice_parse_vsi_func_caps(hw, func_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_FD:
+			ice_parse_fdir_func_caps(hw, func_p);
+			break;
+		default:
+			/* Don't list common capabilities as unknown */
+			if (!found)
+				ice_debug(hw, ICE_DBG_INIT,
+					  "func caps: unknown capability[%d]: 0x%x\n",
+					  i, cap);
+			break;
+		}
+	}
+
+	ice_recalc_port_limited_caps(hw, &func_p->common_cap);
+}
+
+/**
+ * ice_parse_valid_functions_cap - Parse ICE_AQC_CAPS_VALID_FUNCTIONS caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VALID_FUNCTIONS for device capabilities.
+ */
+static void
+ice_parse_valid_functions_cap(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+			      struct ice_aqc_list_caps_elem *cap)
+{
+	u32 number = le32_to_cpu(cap->number);
+
+	dev_p->num_funcs = hweight32(number);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: num_funcs = %d\n",
+		  dev_p->num_funcs);
+}
+
+/**
+ * ice_parse_vf_dev_caps - Parse ICE_AQC_CAPS_VF device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VF for device capabilities.
+ */
+static void
+ice_parse_vf_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+		      struct ice_aqc_list_caps_elem *cap)
+{
+	u32 number = le32_to_cpu(cap->number);
+
+	dev_p->num_vfs_exposed = number;
+	ice_debug(hw, ICE_DBG_INIT, "dev_caps: num_vfs_exposed = %d\n",
+		  dev_p->num_vfs_exposed);
+}
+
+/**
+ * ice_parse_vsi_dev_caps - Parse ICE_AQC_CAPS_VSI device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VSI for device capabilities.
+ */
+static void
+ice_parse_vsi_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+		       struct ice_aqc_list_caps_elem *cap)
+{
+	u32 number = le32_to_cpu(cap->number);
+
+	dev_p->num_vsi_allocd_to_host = number;
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: num_vsi_allocd_to_host = %d\n",
+		  dev_p->num_vsi_allocd_to_host);
+}
+
+/**
+ * ice_parse_fdir_dev_caps - Parse ICE_AQC_CAPS_FD device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_FD for device capabilities.
+ */
+static void
+ice_parse_fdir_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+			struct ice_aqc_list_caps_elem *cap)
+{
+	u32 number = le32_to_cpu(cap->number);
+
+	dev_p->num_flow_director_fltr = number;
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: num_flow_director_fltr = %d\n",
+		  dev_p->num_flow_director_fltr);
+}
+
+/**
+ * ice_parse_dev_caps - Parse device capabilities
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @buf: buffer containing the device capability records
+ * @cap_count: the number of capabilities
+ *
+ * Helper device to parse device (0x000B) capabilities list. For
+ * capabilities shared between device and function, this relies on
+ * ice_parse_common_caps.
+ *
+ * Loop through the list of provided capabilities and extract the relevant
+ * data into the device capabilities structured.
+ */
+static void
+ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+		   void *buf, u32 cap_count)
+{
+	struct ice_aqc_list_caps_elem *cap_resp;
+	u32 i;
+
+	cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+
+	memset(dev_p, 0, sizeof(*dev_p));
+
+	for (i = 0; i < cap_count; i++) {
+		u16 cap = le16_to_cpu(cap_resp[i].cap);
+		bool found;
+
+		found = ice_parse_common_caps(hw, &dev_p->common_cap,
+					      &cap_resp[i], "dev caps");
+
+		switch (cap) {
+		case ICE_AQC_CAPS_VALID_FUNCTIONS:
+			ice_parse_valid_functions_cap(hw, dev_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_VF:
+			ice_parse_vf_dev_caps(hw, dev_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_VSI:
+			ice_parse_vsi_dev_caps(hw, dev_p, &cap_resp[i]);
+			break;
+		case  ICE_AQC_CAPS_FD:
+			ice_parse_fdir_dev_caps(hw, dev_p, &cap_resp[i]);
+			break;
+		default:
+			/* Don't list common capabilities as unknown */
+			if (!found)
+				ice_debug(hw, ICE_DBG_INIT,
+					  "dev caps: unknown capability[%d]: 0x%x\n",
+					  i, cap);
+			break;
+		}
+	}
+
+	ice_recalc_port_limited_caps(hw, &dev_p->common_cap);
+}
+
+/**
+ * ice_aq_list_caps - query function/device capabilities
+ * @hw: pointer to the HW struct
+ * @buf: a buffer to hold the capabilities
+ * @buf_size: size of the buffer
+ * @cap_count: if not NULL, set to the number of capabilities reported
+ * @opc: capabilities type to discover, device or function
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the function (0x000A) or device (0x000B) capabilities description from
+ * firmware and store it in the buffer.
+ *
+ * If the cap_count pointer is not NULL, then it is set to the number of
+ * capabilities firmware will report. Note that if the buffer size is too
+ * small, it is possible the command will return ICE_AQ_ERR_ENOMEM. The
+ * cap_count will still be updated in this case. It is recommended that the
+ * buffer size be set to ICE_AQ_MAX_BUF_LEN (the largest possible buffer that
+ * firmware could return) to avoid this.
+ */
+enum ice_status
+ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
+		 enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_list_caps *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_cap;
+
+	if (opc != ice_aqc_opc_list_func_caps &&
+	    opc != ice_aqc_opc_list_dev_caps)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+
+	if (cap_count)
+		*cap_count = le32_to_cpu(cmd->count);
+
+	return status;
+}
+
+/**
+ * ice_discover_dev_caps - Read and extract device capabilities
+ * @hw: pointer to the hardware structure
+ * @dev_caps: pointer to device capabilities structure
+ *
+ * Read the device capabilities and extract them into the dev_caps structure
+ * for later use.
+ */
+enum ice_status
+ice_discover_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_caps)
+{
+	enum ice_status status;
+	u32 cap_count = 0;
+	void *cbuf;
+
+	cbuf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!cbuf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Although the driver doesn't know the number of capabilities the
+	 * device will return, we can simply send a 4KB buffer, the maximum
+	 * possible size that firmware can return.
+	 */
+	cap_count = ICE_AQ_MAX_BUF_LEN / sizeof(struct ice_aqc_list_caps_elem);
+
+	status = ice_aq_list_caps(hw, cbuf, ICE_AQ_MAX_BUF_LEN, &cap_count,
+				  ice_aqc_opc_list_dev_caps, NULL);
+	if (!status)
+		ice_parse_dev_caps(hw, dev_caps, cbuf, cap_count);
+	kfree(cbuf);
+
+	return status;
+}
+
+/**
+ * ice_discover_func_caps - Read and extract function capabilities
+ * @hw: pointer to the hardware structure
+ * @func_caps: pointer to function capabilities structure
+ *
+ * Read the function capabilities and extract them into the func_caps structure
+ * for later use.
+ */
+static enum ice_status
+ice_discover_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_caps)
+{
+	enum ice_status status;
+	u32 cap_count = 0;
+	void *cbuf;
+
+	cbuf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!cbuf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Although the driver doesn't know the number of capabilities the
+	 * device will return, we can simply send a 4KB buffer, the maximum
+	 * possible size that firmware can return.
+	 */
+	cap_count = ICE_AQ_MAX_BUF_LEN / sizeof(struct ice_aqc_list_caps_elem);
+
+	status = ice_aq_list_caps(hw, cbuf, ICE_AQ_MAX_BUF_LEN, &cap_count,
+				  ice_aqc_opc_list_func_caps, NULL);
+	if (!status)
+		ice_parse_func_caps(hw, func_caps, cbuf, cap_count);
+	kfree(cbuf);
+
+	return status;
+}
+
+/**
+ * ice_set_safe_mode_caps - Override dev/func capabilities when in safe mode
+ * @hw: pointer to the hardware structure
+ */
+void ice_set_safe_mode_caps(struct ice_hw *hw)
+{
+	struct ice_hw_func_caps *func_caps = &hw->func_caps;
+	struct ice_hw_dev_caps *dev_caps = &hw->dev_caps;
+	struct ice_hw_common_caps cached_caps;
+	u32 num_funcs;
+
+	/* cache some func_caps values that should be restored after memset */
+	cached_caps = func_caps->common_cap;
+
+	/* unset func capabilities */
+	memset(func_caps, 0, sizeof(*func_caps));
+
+#define ICE_RESTORE_FUNC_CAP(name) \
+	func_caps->common_cap.name = cached_caps.name
+
+	/* restore cached values */
+	ICE_RESTORE_FUNC_CAP(valid_functions);
+	ICE_RESTORE_FUNC_CAP(txq_first_id);
+	ICE_RESTORE_FUNC_CAP(rxq_first_id);
+	ICE_RESTORE_FUNC_CAP(msix_vector_first_id);
+	ICE_RESTORE_FUNC_CAP(max_mtu);
+	ICE_RESTORE_FUNC_CAP(nvm_unified_update);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_nvm);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_orom);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_netlist);
+
+	/* one Tx and one Rx queue in safe mode */
+	func_caps->common_cap.num_rxq = 1;
+	func_caps->common_cap.num_txq = 1;
+
+	/* two MSIX vectors, one for traffic and one for misc causes */
+	func_caps->common_cap.num_msix_vectors = 2;
+	func_caps->guar_num_vsi = 1;
+
+	/* cache some dev_caps values that should be restored after memset */
+	cached_caps = dev_caps->common_cap;
+	num_funcs = dev_caps->num_funcs;
+
+	/* unset dev capabilities */
+	memset(dev_caps, 0, sizeof(*dev_caps));
+
+#define ICE_RESTORE_DEV_CAP(name) \
+	dev_caps->common_cap.name = cached_caps.name
+
+	/* restore cached values */
+	ICE_RESTORE_DEV_CAP(valid_functions);
+	ICE_RESTORE_DEV_CAP(txq_first_id);
+	ICE_RESTORE_DEV_CAP(rxq_first_id);
+	ICE_RESTORE_DEV_CAP(msix_vector_first_id);
+	ICE_RESTORE_DEV_CAP(max_mtu);
+	ICE_RESTORE_DEV_CAP(nvm_unified_update);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_nvm);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_orom);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_netlist);
+	dev_caps->num_funcs = num_funcs;
+
+	/* one Tx and one Rx queue per function in safe mode */
+	dev_caps->common_cap.num_rxq = num_funcs;
+	dev_caps->common_cap.num_txq = num_funcs;
+
+	/* two MSIX vectors per function */
+	dev_caps->common_cap.num_msix_vectors = 2 * num_funcs;
+}
+
+/**
+ * ice_get_caps - get info about the HW
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_get_caps(struct ice_hw *hw)
+{
+	enum ice_status status;
+
+	status = ice_discover_dev_caps(hw, &hw->dev_caps);
+	if (status)
+		return status;
+
+	return ice_discover_func_caps(hw, &hw->func_caps);
+}
+
+/**
+ * ice_aq_manage_mac_write - manage MAC address write command
+ * @hw: pointer to the HW struct
+ * @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address
+ * @flags: flags to control write behavior
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to write MAC address to the NVM (0x0108).
+ */
+enum ice_status
+ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
+			struct ice_sq_cd *cd)
+{
+	struct ice_aqc_manage_mac_write *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.mac_write;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write);
+
+	cmd->flags = flags;
+	ether_addr_copy(cmd->mac_addr, mac_addr);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_clear_pxe_mode
+ * @hw: pointer to the HW struct
+ *
+ * Tell the firmware that the driver is taking over from PXE (0x0110).
+ */
+static enum ice_status ice_aq_clear_pxe_mode(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pxe_mode);
+	desc.params.clear_pxe.rx_cnt = ICE_AQC_CLEAR_PXE_RX_CNT;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_clear_pxe_mode - clear pxe operations mode
+ * @hw: pointer to the HW struct
+ *
+ * Make sure all PXE mode settings are cleared, including things
+ * like descriptor fetch/write-back mode.
+ */
+void ice_clear_pxe_mode(struct ice_hw *hw)
+{
+	if (ice_check_sq_alive(hw, &hw->adminq))
+		ice_aq_clear_pxe_mode(hw);
+}
+
+/**
+ * ice_get_link_speed_based_on_phy_type - returns link speed
+ * @phy_type_low: lower part of phy_type
+ * @phy_type_high: higher part of phy_type
+ *
+ * This helper function will convert an entry in PHY type structure
+ * [phy_type_low, phy_type_high] to its corresponding link speed.
+ * Note: In the structure of [phy_type_low, phy_type_high], there should
+ * be one bit set, as this function will convert one PHY type to its
+ * speed.
+ * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
+ * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned
+ */
+static u16
+ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high)
+{
+	u16 speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+	u16 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
+
+	switch (phy_type_low) {
+	case ICE_PHY_TYPE_LOW_100BASE_TX:
+	case ICE_PHY_TYPE_LOW_100M_SGMII:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_100MB;
+		break;
+	case ICE_PHY_TYPE_LOW_1000BASE_T:
+	case ICE_PHY_TYPE_LOW_1000BASE_SX:
+	case ICE_PHY_TYPE_LOW_1000BASE_LX:
+	case ICE_PHY_TYPE_LOW_1000BASE_KX:
+	case ICE_PHY_TYPE_LOW_1G_SGMII:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_1000MB;
+		break;
+	case ICE_PHY_TYPE_LOW_2500BASE_T:
+	case ICE_PHY_TYPE_LOW_2500BASE_X:
+	case ICE_PHY_TYPE_LOW_2500BASE_KX:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_2500MB;
+		break;
+	case ICE_PHY_TYPE_LOW_5GBASE_T:
+	case ICE_PHY_TYPE_LOW_5GBASE_KR:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_5GB;
+		break;
+	case ICE_PHY_TYPE_LOW_10GBASE_T:
+	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+	case ICE_PHY_TYPE_LOW_10GBASE_SR:
+	case ICE_PHY_TYPE_LOW_10GBASE_LR:
+	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_10GB;
+		break;
+	case ICE_PHY_TYPE_LOW_25GBASE_T:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+	case ICE_PHY_TYPE_LOW_25GBASE_SR:
+	case ICE_PHY_TYPE_LOW_25GBASE_LR:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_25GB;
+		break;
+	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_40G_XLAUI:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_40GB;
+		break;
+	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_LAUI2:
+	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_AUI2:
+	case ICE_PHY_TYPE_LOW_50GBASE_CP:
+	case ICE_PHY_TYPE_LOW_50GBASE_SR:
+	case ICE_PHY_TYPE_LOW_50GBASE_FR:
+	case ICE_PHY_TYPE_LOW_50GBASE_LR:
+	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_AUI1:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_50GB;
+		break;
+	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_100G_CAUI4:
+	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_100G_AUI4:
+	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
+	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+	case ICE_PHY_TYPE_LOW_100GBASE_DR:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_100GB;
+		break;
+	default:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
+		break;
+	}
+
+	switch (phy_type_high) {
+	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
+	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_100G_AUI2:
+		speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB;
+		break;
+	default:
+		speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+		break;
+	}
+
+	if (speed_phy_type_low == ICE_AQ_LINK_SPEED_UNKNOWN &&
+	    speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
+		return ICE_AQ_LINK_SPEED_UNKNOWN;
+	else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
+		 speed_phy_type_high != ICE_AQ_LINK_SPEED_UNKNOWN)
+		return ICE_AQ_LINK_SPEED_UNKNOWN;
+	else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
+		 speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
+		return speed_phy_type_low;
+	else
+		return speed_phy_type_high;
+}
+
+/**
+ * ice_update_phy_type
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @link_speeds_bitmap: targeted link speeds bitmap
+ *
+ * Note: For the link_speeds_bitmap structure, you can check it at
+ * [ice_aqc_get_link_status->link_speed]. Caller can pass in
+ * link_speeds_bitmap include multiple speeds.
+ *
+ * Each entry in this [phy_type_low, phy_type_high] structure will
+ * present a certain link speed. This helper function will turn on bits
+ * in [phy_type_low, phy_type_high] structure based on the value of
+ * link_speeds_bitmap input parameter.
+ */
+void
+ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
+		    u16 link_speeds_bitmap)
+{
+	u64 pt_high;
+	u64 pt_low;
+	int index;
+	u16 speed;
+
+	/* We first check with low part of phy_type */
+	for (index = 0; index <= ICE_PHY_TYPE_LOW_MAX_INDEX; index++) {
+		pt_low = BIT_ULL(index);
+		speed = ice_get_link_speed_based_on_phy_type(pt_low, 0);
+
+		if (link_speeds_bitmap & speed)
+			*phy_type_low |= BIT_ULL(index);
+	}
+
+	/* We then check with high part of phy_type */
+	for (index = 0; index <= ICE_PHY_TYPE_HIGH_MAX_INDEX; index++) {
+		pt_high = BIT_ULL(index);
+		speed = ice_get_link_speed_based_on_phy_type(0, pt_high);
+
+		if (link_speeds_bitmap & speed)
+			*phy_type_high |= BIT_ULL(index);
+	}
+}
+
+/**
+ * ice_aq_set_phy_cfg
+ * @hw: pointer to the HW struct
+ * @pi: port info structure of the interested logical port
+ * @cfg: structure with PHY configuration data to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the various PHY configuration parameters supported on the Port.
+ * One or more of the Set PHY config parameters may be ignored in an MFP
+ * mode as the PF may not have the privilege to set some of the PHY Config
+ * parameters. This status will be indicated by the command response (0x0601).
+ */
+enum ice_status
+ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
+		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (!cfg)
+		return ICE_ERR_PARAM;
+
+	/* Ensure that only valid bits of cfg->caps can be turned on. */
+	if (cfg->caps & ~ICE_AQ_PHY_ENA_VALID_MASK) {
+		ice_debug(hw, ICE_DBG_PHY,
+			  "Invalid bit is set in ice_aqc_set_phy_cfg_data->caps : 0x%x\n",
+			  cfg->caps);
+
+		cfg->caps &= ICE_AQ_PHY_ENA_VALID_MASK;
+	}
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg);
+	desc.params.set_phy.lport_num = pi->lport;
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	ice_debug(hw, ICE_DBG_LINK, "set phy cfg\n");
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_low = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(cfg->phy_type_low));
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_high = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(cfg->phy_type_high));
+	ice_debug(hw, ICE_DBG_LINK, "	caps = 0x%x\n", cfg->caps);
+	ice_debug(hw, ICE_DBG_LINK, "	low_power_ctrl_an = 0x%x\n",
+		  cfg->low_power_ctrl_an);
+	ice_debug(hw, ICE_DBG_LINK, "	eee_cap = 0x%x\n", cfg->eee_cap);
+	ice_debug(hw, ICE_DBG_LINK, "	eeer_value = 0x%x\n", cfg->eeer_value);
+	ice_debug(hw, ICE_DBG_LINK, "	link_fec_opt = 0x%x\n",
+		  cfg->link_fec_opt);
+
+	status = ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
+	if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
+		status = 0;
+
+	if (!status)
+		pi->phy.curr_user_phy_cfg = *cfg;
+
+	return status;
+}
+
+/**
+ * ice_update_link_info - update status of the HW network link
+ * @pi: port info structure of the interested logical port
+ */
+enum ice_status ice_update_link_info(struct ice_port_info *pi)
+{
+	struct ice_link_status *li;
+	enum ice_status status;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	li = &pi->phy.link_info;
+
+	status = ice_aq_get_link_info(pi, true, NULL, NULL);
+	if (status)
+		return status;
+
+	if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		struct ice_aqc_get_phy_caps_data *pcaps;
+		struct ice_hw *hw;
+
+		hw = pi->hw;
+		pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps),
+				     GFP_KERNEL);
+		if (!pcaps)
+			return ICE_ERR_NO_MEMORY;
+
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+					     pcaps, NULL);
+
+		devm_kfree(ice_hw_to_dev(hw), pcaps);
+	}
+
+	return status;
+}
+
+/**
+ * ice_cache_phy_user_req
+ * @pi: port information structure
+ * @cache_data: PHY logging data
+ * @cache_mode: PHY logging mode
+ *
+ * Log the user request on (FC, FEC, SPEED) for later use.
+ */
+static void
+ice_cache_phy_user_req(struct ice_port_info *pi,
+		       struct ice_phy_cache_mode_data cache_data,
+		       enum ice_phy_cache_mode cache_mode)
+{
+	if (!pi)
+		return;
+
+	switch (cache_mode) {
+	case ICE_FC_MODE:
+		pi->phy.curr_user_fc_req = cache_data.data.curr_user_fc_req;
+		break;
+	case ICE_SPEED_MODE:
+		pi->phy.curr_user_speed_req =
+			cache_data.data.curr_user_speed_req;
+		break;
+	case ICE_FEC_MODE:
+		pi->phy.curr_user_fec_req = cache_data.data.curr_user_fec_req;
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_caps_to_fc_mode
+ * @caps: PHY capabilities
+ *
+ * Convert PHY FC capabilities to ice FC mode
+ */
+enum ice_fc_mode ice_caps_to_fc_mode(u8 caps)
+{
+	if (caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE &&
+	    caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+		return ICE_FC_FULL;
+
+	if (caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
+		return ICE_FC_TX_PAUSE;
+
+	if (caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+		return ICE_FC_RX_PAUSE;
+
+	return ICE_FC_NONE;
+}
+
+/**
+ * ice_caps_to_fec_mode
+ * @caps: PHY capabilities
+ * @fec_options: Link FEC options
+ *
+ * Convert PHY FEC capabilities to ice FEC mode
+ */
+enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options)
+{
+	if (caps & ICE_AQC_PHY_EN_AUTO_FEC)
+		return ICE_FEC_AUTO;
+
+	if (fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+			   ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+			   ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN |
+			   ICE_AQC_PHY_FEC_25G_KR_REQ))
+		return ICE_FEC_BASER;
+
+	if (fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+			   ICE_AQC_PHY_FEC_25G_RS_544_REQ |
+			   ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN))
+		return ICE_FEC_RS;
+
+	return ICE_FEC_NONE;
+}
+
+/**
+ * ice_cfg_phy_fc - Configure PHY FC data based on FC mode
+ * @pi: port information structure
+ * @cfg: PHY configuration data to set FC mode
+ * @req_mode: FC mode to configure
+ */
+enum ice_status
+ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+	       enum ice_fc_mode req_mode)
+{
+	struct ice_phy_cache_mode_data cache_data;
+	u8 pause_mask = 0x0;
+
+	if (!pi || !cfg)
+		return ICE_ERR_BAD_PTR;
+
+	switch (req_mode) {
+	case ICE_FC_FULL:
+		pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+		pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+		break;
+	case ICE_FC_RX_PAUSE:
+		pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+		break;
+	case ICE_FC_TX_PAUSE:
+		pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+		break;
+	default:
+		break;
+	}
+
+	/* clear the old pause settings */
+	cfg->caps &= ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE |
+		ICE_AQC_PHY_EN_RX_LINK_PAUSE);
+
+	/* set the new capabilities */
+	cfg->caps |= pause_mask;
+
+	/* Cache user FC request */
+	cache_data.data.curr_user_fc_req = req_mode;
+	ice_cache_phy_user_req(pi, cache_data, ICE_FC_MODE);
+
+	return 0;
+}
+
+/**
+ * ice_set_fc
+ * @pi: port information structure
+ * @aq_failures: pointer to status code, specific to ice_set_fc routine
+ * @ena_auto_link_update: enable automatic link update
+ *
+ * Set the requested flow control mode.
+ */
+enum ice_status
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
+{
+	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	if (!pi || !aq_failures)
+		return ICE_ERR_BAD_PTR;
+
+	*aq_failures = 0;
+	hw = pi->hw;
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Get the current PHY config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
+				     pcaps, NULL);
+	if (status) {
+		*aq_failures = ICE_SET_FC_AQ_FAIL_GET;
+		goto out;
+	}
+
+	ice_copy_phy_caps_to_cfg(pi, pcaps, &cfg);
+
+	/* Configure the set PHY data */
+	status = ice_cfg_phy_fc(pi, &cfg, pi->fc.req_mode);
+	if (status)
+		goto out;
+
+	/* If the capabilities have changed, then set the new config */
+	if (cfg.caps != pcaps->caps) {
+		int retry_count, retry_max = 10;
+
+		/* Auto restart link so settings take effect */
+		if (ena_auto_link_update)
+			cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+		status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
+		if (status) {
+			*aq_failures = ICE_SET_FC_AQ_FAIL_SET;
+			goto out;
+		}
+
+		/* Update the link info
+		 * It sometimes takes a really long time for link to
+		 * come back from the atomic reset. Thus, we wait a
+		 * little bit.
+		 */
+		for (retry_count = 0; retry_count < retry_max; retry_count++) {
+			status = ice_update_link_info(pi);
+
+			if (!status)
+				break;
+
+			mdelay(100);
+		}
+
+		if (status)
+			*aq_failures = ICE_SET_FC_AQ_FAIL_UPDATE;
+	}
+
+out:
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	return status;
+}
+
+/**
+ * ice_phy_caps_equals_cfg
+ * @phy_caps: PHY capabilities
+ * @phy_cfg: PHY configuration
+ *
+ * Helper function to determine if PHY capabilities matches PHY
+ * configuration
+ */
+bool
+ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *phy_caps,
+			struct ice_aqc_set_phy_cfg_data *phy_cfg)
+{
+	u8 caps_mask, cfg_mask;
+
+	if (!phy_caps || !phy_cfg)
+		return false;
+
+	/* These bits are not common between capabilities and configuration.
+	 * Do not use them to determine equality.
+	 */
+	caps_mask = ICE_AQC_PHY_CAPS_MASK & ~(ICE_AQC_PHY_AN_MODE |
+					      ICE_AQC_GET_PHY_EN_MOD_QUAL);
+	cfg_mask = ICE_AQ_PHY_ENA_VALID_MASK & ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+	if (phy_caps->phy_type_low != phy_cfg->phy_type_low ||
+	    phy_caps->phy_type_high != phy_cfg->phy_type_high ||
+	    ((phy_caps->caps & caps_mask) != (phy_cfg->caps & cfg_mask)) ||
+	    phy_caps->low_power_ctrl_an != phy_cfg->low_power_ctrl_an ||
+	    phy_caps->eee_cap != phy_cfg->eee_cap ||
+	    phy_caps->eeer_value != phy_cfg->eeer_value ||
+	    phy_caps->link_fec_options != phy_cfg->link_fec_opt)
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data
+ * @pi: port information structure
+ * @caps: PHY ability structure to copy date from
+ * @cfg: PHY configuration structure to copy data to
+ *
+ * Helper function to copy AQC PHY get ability data to PHY set configuration
+ * data structure
+ */
+void
+ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
+			 struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg)
+{
+	if (!pi || !caps || !cfg)
+		return;
+
+	memset(cfg, 0, sizeof(*cfg));
+	cfg->phy_type_low = caps->phy_type_low;
+	cfg->phy_type_high = caps->phy_type_high;
+	cfg->caps = caps->caps;
+	cfg->low_power_ctrl_an = caps->low_power_ctrl_an;
+	cfg->eee_cap = caps->eee_cap;
+	cfg->eeer_value = caps->eeer_value;
+	cfg->link_fec_opt = caps->link_fec_options;
+	cfg->module_compliance_enforcement =
+		caps->module_compliance_enforcement;
+
+	if (ice_fw_supports_link_override(pi->hw)) {
+		struct ice_link_default_override_tlv tlv;
+
+		if (ice_get_link_default_override(&tlv, pi))
+			return;
+
+		if (tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)
+			cfg->module_compliance_enforcement |=
+				ICE_LINK_OVERRIDE_STRICT_MODE;
+	}
+}
+
+/**
+ * ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode
+ * @pi: port information structure
+ * @cfg: PHY configuration data to set FEC mode
+ * @fec: FEC mode to configure
+ */
+enum ice_status
+ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+		enum ice_fec_mode fec)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	enum ice_status status;
+
+	if (!pi || !cfg)
+		return ICE_ERR_BAD_PTR;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
+				     NULL);
+	if (status)
+		goto out;
+
+	cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
+	cfg->link_fec_opt = pcaps->link_fec_options;
+
+	switch (fec) {
+	case ICE_FEC_BASER:
+		/* Clear RS bits, and AND BASE-R ability
+		 * bits and OR request bits.
+		 */
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+			ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+			ICE_AQC_PHY_FEC_25G_KR_REQ;
+		break;
+	case ICE_FEC_RS:
+		/* Clear BASE-R bits, and AND RS ability
+		 * bits and OR request bits.
+		 */
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+			ICE_AQC_PHY_FEC_25G_RS_544_REQ;
+		break;
+	case ICE_FEC_NONE:
+		/* Clear all FEC option bits. */
+		cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK;
+		break;
+	case ICE_FEC_AUTO:
+		/* AND auto FEC bit, and all caps bits. */
+		cfg->caps &= ICE_AQC_PHY_CAPS_MASK;
+		cfg->link_fec_opt |= pcaps->link_fec_options;
+		break;
+	default:
+		status = ICE_ERR_PARAM;
+		break;
+	}
+
+	if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(pi->hw)) {
+		struct ice_link_default_override_tlv tlv;
+
+		status = ice_get_link_default_override(&tlv, pi);
+		if (status)
+			goto out;
+
+		if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) &&
+		    (tlv.options & ICE_LINK_OVERRIDE_EN))
+			cfg->link_fec_opt = tlv.fec_options;
+	}
+
+out:
+	kfree(pcaps);
+
+	return status;
+}
+
+/**
+ * ice_get_link_status - get status of the HW network link
+ * @pi: port information structure
+ * @link_up: pointer to bool (true/false = linkup/linkdown)
+ *
+ * Variable link_up is true if link is up, false if link is down.
+ * The variable link_up is invalid if status is non zero. As a
+ * result of this call, link status reporting becomes enabled
+ */
+enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up)
+{
+	struct ice_phy_info *phy_info;
+	enum ice_status status = 0;
+
+	if (!pi || !link_up)
+		return ICE_ERR_PARAM;
+
+	phy_info = &pi->phy;
+
+	if (phy_info->get_link_info) {
+		status = ice_update_link_info(pi);
+
+		if (status)
+			ice_debug(pi->hw, ICE_DBG_LINK,
+				  "get link status error, status = %d\n",
+				  status);
+	}
+
+	*link_up = phy_info->link_info.link_info & ICE_AQ_LINK_UP;
+
+	return status;
+}
+
+/**
+ * ice_aq_set_link_restart_an
+ * @pi: pointer to the port information structure
+ * @ena_link: if true: enable link, if false: disable link
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ */
+enum ice_status
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+			   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_restart_an *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.restart_an;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_restart_an);
+
+	cmd->cmd_flags = ICE_AQC_RESTART_AN_LINK_RESTART;
+	cmd->lport_num = pi->lport;
+	if (ena_link)
+		cmd->cmd_flags |= ICE_AQC_RESTART_AN_LINK_ENABLE;
+	else
+		cmd->cmd_flags &= ~ICE_AQC_RESTART_AN_LINK_ENABLE;
+
+	return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_event_mask
+ * @hw: pointer to the HW struct
+ * @port_num: port number of the physical function
+ * @mask: event mask to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set event mask (0x0613)
+ */
+enum ice_status
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_event_mask *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_event_mask;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask);
+
+	cmd->lport_num = port_num;
+
+	cmd->event_mask = cpu_to_le16(mask);
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_mac_loopback
+ * @hw: pointer to the HW struct
+ * @ena_lpbk: Enable or Disable loopback
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable/disable loopback on a given port
+ */
+enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_mac_lb *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_mac_lb;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_lb);
+	if (ena_lpbk)
+		cmd->lb_mode = ICE_AQ_MAC_LB_EN;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_port_id_led
+ * @pi: pointer to the port information
+ * @is_orig_mode: is this LED set to original mode (by the net-list)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set LED value for the given port (0x06e9)
+ */
+enum ice_status
+ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_port_id_led *cmd;
+	struct ice_hw *hw = pi->hw;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_port_id_led;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_id_led);
+
+	if (is_orig_mode)
+		cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_ORIG;
+	else
+		cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_BLINK;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_sff_eeprom
+ * @hw: pointer to the HW struct
+ * @lport: bits [7:0] = logical port, bit [8] = logical port valid
+ * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default)
+ * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding.
+ * @page: QSFP page
+ * @set_page: set or ignore the page
+ * @data: pointer to data buffer to be read/written to the I2C device.
+ * @length: 1-16 for read, 1 for write.
+ * @write: 0 read, 1 for write.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read/Write SFF EEPROM (0x06EE)
+ */
+enum ice_status
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+		  bool write, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_sff_eeprom *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (!data || (mem_addr & 0xff00))
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom);
+	cmd = &desc.params.read_write_sff_param;
+	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF);
+	cmd->lport_num = (u8)(lport & 0xff);
+	cmd->lport_num_valid = (u8)((lport >> 8) & 0x01);
+	cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) &
+					 ICE_AQC_SFF_I2CBUS_7BIT_M) |
+					((set_page <<
+					  ICE_AQC_SFF_SET_EEPROM_PAGE_S) &
+					 ICE_AQC_SFF_SET_EEPROM_PAGE_M));
+	cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff);
+	cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S);
+	if (write)
+		cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE);
+
+	status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+	return status;
+}
+
+/**
+ * __ice_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: VSI FW index
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ * @glob_lut_idx: global LUT index
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get (0x0B05) or set (0x0B03) RSS look up table
+ */
+static enum ice_status
+__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+			 u16 lut_size, u8 glob_lut_idx, bool set)
+{
+	struct ice_aqc_get_set_rss_lut *cmd_resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 flags = 0;
+
+	cmd_resp = &desc.params.get_set_rss_lut;
+
+	if (set) {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_lut);
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	} else {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_lut);
+	}
+
+	cmd_resp->vsi_id = cpu_to_le16(((vsi_id <<
+					 ICE_AQC_GSET_RSS_LUT_VSI_ID_S) &
+					ICE_AQC_GSET_RSS_LUT_VSI_ID_M) |
+				       ICE_AQC_GSET_RSS_LUT_VSI_VALID);
+
+	switch (lut_type) {
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI:
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF:
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL:
+		flags |= ((lut_type << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S) &
+			  ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M);
+		break;
+	default:
+		status = ICE_ERR_PARAM;
+		goto ice_aq_get_set_rss_lut_exit;
+	}
+
+	if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL) {
+		flags |= ((glob_lut_idx << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S) &
+			  ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M);
+
+		if (!set)
+			goto ice_aq_get_set_rss_lut_send;
+	} else if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) {
+		if (!set)
+			goto ice_aq_get_set_rss_lut_send;
+	} else {
+		goto ice_aq_get_set_rss_lut_send;
+	}
+
+	/* LUT size is only valid for Global and PF table types */
+	switch (lut_size) {
+	case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128:
+		break;
+	case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512:
+		flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG <<
+			  ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+			 ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+		break;
+	case ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K:
+		if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) {
+			flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG <<
+				  ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+				 ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+			break;
+		}
+		fallthrough;
+	default:
+		status = ICE_ERR_PARAM;
+		goto ice_aq_get_set_rss_lut_exit;
+	}
+
+ice_aq_get_set_rss_lut_send:
+	cmd_resp->flags = cpu_to_le16(flags);
+	status = ice_aq_send_cmd(hw, &desc, lut, lut_size, NULL);
+
+ice_aq_get_set_rss_lut_exit:
+	return status;
+}
+
+/**
+ * ice_aq_get_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ *
+ * get the RSS lookup table, PF or VSI type
+ */
+enum ice_status
+ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type,
+		   u8 *lut, u16 lut_size)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
+		return ICE_ERR_PARAM;
+
+	return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					lut_type, lut, lut_size, 0, false);
+}
+
+/**
+ * ice_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ *
+ * set the RSS lookup table, PF or VSI type
+ */
+enum ice_status
+ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type,
+		   u8 *lut, u16 lut_size)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
+		return ICE_ERR_PARAM;
+
+	return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					lut_type, lut, lut_size, 0, true);
+}
+
+/**
+ * __ice_aq_get_set_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_id: VSI FW index
+ * @key: pointer to key info struct
+ * @set: set true to set the key, false to get the key
+ *
+ * get (0x0B04) or set (0x0B02) the RSS key per VSI
+ */
+static enum
+ice_status __ice_aq_get_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+				    struct ice_aqc_get_set_rss_keys *key,
+				    bool set)
+{
+	struct ice_aqc_get_set_rss_key *cmd_resp;
+	u16 key_size = sizeof(*key);
+	struct ice_aq_desc desc;
+
+	cmd_resp = &desc.params.get_set_rss_key;
+
+	if (set) {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_key);
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	} else {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_key);
+	}
+
+	cmd_resp->vsi_id = cpu_to_le16(((vsi_id <<
+					 ICE_AQC_GSET_RSS_KEY_VSI_ID_S) &
+					ICE_AQC_GSET_RSS_KEY_VSI_ID_M) |
+				       ICE_AQC_GSET_RSS_KEY_VSI_VALID);
+
+	return ice_aq_send_cmd(hw, &desc, key, key_size, NULL);
+}
+
+/**
+ * ice_aq_get_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @key: pointer to key info struct
+ *
+ * get the RSS key per VSI
+ */
+enum ice_status
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
+		   struct ice_aqc_get_set_rss_keys *key)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !key)
+		return ICE_ERR_PARAM;
+
+	return __ice_aq_get_set_rss_key(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					key, false);
+}
+
+/**
+ * ice_aq_set_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @keys: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ */
+enum ice_status
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
+		   struct ice_aqc_get_set_rss_keys *keys)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !keys)
+		return ICE_ERR_PARAM;
+
+	return __ice_aq_get_set_rss_key(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					keys, true);
+}
+
+/**
+ * ice_aq_add_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: Number of added queue groups
+ * @qg_list: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx LAN queue (0x0C30)
+ *
+ * NOTE:
+ * Prior to calling add Tx LAN queue:
+ * Initialize the following as part of the Tx queue context:
+ * Completion queue ID if the queue uses Completion queue, Quanta profile,
+ * Cache profile and Packet shaper profile.
+ *
+ * After add Tx LAN queue AQ command is completed:
+ * Interrupts should be associated with specific queues,
+ * Association of Tx queue to Doorbell queue is not part of Add LAN Tx queue
+ * flow.
+ */
+static enum ice_status
+ice_aq_add_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+		   struct ice_aqc_add_tx_qgrp *qg_list, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_tx_qgrp *list;
+	struct ice_aqc_add_txqs *cmd;
+	struct ice_aq_desc desc;
+	u16 i, sum_size = 0;
+
+	cmd = &desc.params.add_txqs;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_txqs);
+
+	if (!qg_list)
+		return ICE_ERR_PARAM;
+
+	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+		return ICE_ERR_PARAM;
+
+	for (i = 0, list = qg_list; i < num_qgrps; i++) {
+		sum_size += struct_size(list, txqs, list->num_txqs);
+		list = (struct ice_aqc_add_tx_qgrp *)(list->txqs +
+						      list->num_txqs);
+	}
+
+	if (buf_size != sum_size)
+		return ICE_ERR_PARAM;
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_qgrps = num_qgrps;
+
+	return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+}
+
+/**
+ * ice_aq_dis_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: number of groups in the list
+ * @qg_list: the list of groups to disable
+ * @buf_size: the total size of the qg_list buffer in bytes
+ * @rst_src: if called due to reset, specifies the reset source
+ * @vmvf_num: the relative VM or VF number that is undergoing the reset
+ * @cd: pointer to command details structure or NULL
+ *
+ * Disable LAN Tx queue (0x0C31)
+ */
+static enum ice_status
+ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+		   struct ice_aqc_dis_txq_item *qg_list, u16 buf_size,
+		   enum ice_disq_rst_src rst_src, u16 vmvf_num,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_dis_txq_item *item;
+	struct ice_aqc_dis_txqs *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 i, sz = 0;
+
+	cmd = &desc.params.dis_txqs;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dis_txqs);
+
+	/* qg_list can be NULL only in VM/VF reset flow */
+	if (!qg_list && !rst_src)
+		return ICE_ERR_PARAM;
+
+	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+		return ICE_ERR_PARAM;
+
+	cmd->num_entries = num_qgrps;
+
+	cmd->vmvf_and_timeout = cpu_to_le16((5 << ICE_AQC_Q_DIS_TIMEOUT_S) &
+					    ICE_AQC_Q_DIS_TIMEOUT_M);
+
+	switch (rst_src) {
+	case ICE_VM_RESET:
+		cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VM_RESET;
+		cmd->vmvf_and_timeout |=
+			cpu_to_le16(vmvf_num & ICE_AQC_Q_DIS_VMVF_NUM_M);
+		break;
+	case ICE_VF_RESET:
+		cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VF_RESET;
+		/* In this case, FW expects vmvf_num to be absolute VF ID */
+		cmd->vmvf_and_timeout |=
+			cpu_to_le16((vmvf_num + hw->func_caps.vf_base_id) &
+				    ICE_AQC_Q_DIS_VMVF_NUM_M);
+		break;
+	case ICE_NO_RESET:
+	default:
+		break;
+	}
+
+	/* flush pipe on time out */
+	cmd->cmd_type |= ICE_AQC_Q_DIS_CMD_FLUSH_PIPE;
+	/* If no queue group info, we are in a reset flow. Issue the AQ */
+	if (!qg_list)
+		goto do_aq;
+
+	/* set RD bit to indicate that command buffer is provided by the driver
+	 * and it needs to be read by the firmware
+	 */
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	for (i = 0, item = qg_list; i < num_qgrps; i++) {
+		u16 item_size = struct_size(item, q_id, item->num_qs);
+
+		/* If the num of queues is even, add 2 bytes of padding */
+		if ((item->num_qs % 2) == 0)
+			item_size += 2;
+
+		sz += item_size;
+
+		item = (struct ice_aqc_dis_txq_item *)((u8 *)item + item_size);
+	}
+
+	if (buf_size != sz)
+		return ICE_ERR_PARAM;
+
+do_aq:
+	status = ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+	if (status) {
+		if (!qg_list)
+			ice_debug(hw, ICE_DBG_SCHED, "VM%d disable failed %d\n",
+				  vmvf_num, hw->adminq.sq_last_status);
+		else
+			ice_debug(hw, ICE_DBG_SCHED, "disable queue %d failed %d\n",
+				  le16_to_cpu(qg_list[0].q_id[0]),
+				  hw->adminq.sq_last_status);
+	}
+	return status;
+}
+
+/* End of FW Admin Queue command wrappers */
+
+/**
+ * ice_write_byte - write a byte to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void
+ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	u8 src_byte, dest_byte, mask;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = (u8)(BIT(ce_info->width) - 1);
+
+	src_byte = *from;
+	src_byte &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_byte <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_byte, dest, sizeof(dest_byte));
+
+	dest_byte &= ~mask;	/* get the bits not changing */
+	dest_byte |= src_byte;	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_byte, sizeof(dest_byte));
+}
+
+/**
+ * ice_write_word - write a word to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void
+ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	u16 src_word, mask;
+	__le16 dest_word;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = BIT(ce_info->width) - 1;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_word = *(u16 *)from;
+	src_word &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_word <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_word, dest, sizeof(dest_word));
+
+	dest_word &= ~(cpu_to_le16(mask));	/* get the bits not changing */
+	dest_word |= cpu_to_le16(src_word);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_word, sizeof(dest_word));
+}
+
+/**
+ * ice_write_dword - write a dword to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void
+ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	u32 src_dword, mask;
+	__le32 dest_dword;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 32 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 5 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 32)
+		mask = BIT(ce_info->width) - 1;
+	else
+		mask = (u32)~0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_dword = *(u32 *)from;
+	src_dword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_dword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_dword, dest, sizeof(dest_dword));
+
+	dest_dword &= ~(cpu_to_le32(mask));	/* get the bits not changing */
+	dest_dword |= cpu_to_le32(src_dword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_dword, sizeof(dest_dword));
+}
+
+/**
+ * ice_write_qword - write a qword to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void
+ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	u64 src_qword, mask;
+	__le64 dest_qword;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 64 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 6 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 64)
+		mask = BIT_ULL(ce_info->width) - 1;
+	else
+		mask = (u64)~0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_qword = *(u64 *)from;
+	src_qword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_qword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_qword, dest, sizeof(dest_qword));
+
+	dest_qword &= ~(cpu_to_le64(mask));	/* get the bits not changing */
+	dest_qword |= cpu_to_le64(src_qword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_qword, sizeof(dest_qword));
+}
+
+/**
+ * ice_set_ctx - set context bits in packed structure
+ * @hw: pointer to the hardware structure
+ * @src_ctx:  pointer to a generic non-packed context structure
+ * @dest_ctx: pointer to memory for the packed structure
+ * @ce_info:  a description of the structure to be transformed
+ */
+enum ice_status
+ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+	    const struct ice_ctx_ele *ce_info)
+{
+	int f;
+
+	for (f = 0; ce_info[f].width; f++) {
+		/* We have to deal with each element of the FW response
+		 * using the correct size so that we are correct regardless
+		 * of the endianness of the machine.
+		 */
+		if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) {
+			ice_debug(hw, ICE_DBG_QCTX,
+				  "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n",
+				  f, ce_info[f].width, ce_info[f].size_of);
+			continue;
+		}
+		switch (ce_info[f].size_of) {
+		case sizeof(u8):
+			ice_write_byte(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u16):
+			ice_write_word(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u32):
+			ice_write_dword(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u64):
+			ice_write_qword(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		default:
+			return ICE_ERR_INVAL_SIZE;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_lan_q_ctx - get the LAN queue context for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @q_handle: software queue handle
+ */
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle)
+{
+	struct ice_vsi_ctx *vsi;
+	struct ice_q_ctx *q_ctx;
+
+	vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi)
+		return NULL;
+	if (q_handle >= vsi->num_lan_q_entries[tc])
+		return NULL;
+	if (!vsi->lan_q_ctx[tc])
+		return NULL;
+	q_ctx = vsi->lan_q_ctx[tc];
+	return &q_ctx[q_handle];
+}
+
+/**
+ * ice_ena_vsi_txq
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @q_handle: software queue handle
+ * @num_qgrps: Number of added queue groups
+ * @buf: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function adds one LAN queue
+ */
+enum ice_status
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
+		u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aqc_txsched_elem_data node = { 0 };
+	struct ice_sched_node *parent;
+	struct ice_q_ctx *q_ctx;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	if (num_qgrps > 1 || buf->num_txqs > 1)
+		return ICE_ERR_MAX_LIMIT;
+
+	hw = pi->hw;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&pi->sched_lock);
+
+	q_ctx = ice_get_lan_q_ctx(hw, vsi_handle, tc, q_handle);
+	if (!q_ctx) {
+		ice_debug(hw, ICE_DBG_SCHED, "Enaq: invalid queue handle %d\n",
+			  q_handle);
+		status = ICE_ERR_PARAM;
+		goto ena_txq_exit;
+	}
+
+	/* find a parent node */
+	parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
+					    ICE_SCHED_NODE_OWNER_LAN);
+	if (!parent) {
+		status = ICE_ERR_PARAM;
+		goto ena_txq_exit;
+	}
+
+	buf->parent_teid = parent->info.node_teid;
+	node.parent_teid = parent->info.node_teid;
+	/* Mark that the values in the "generic" section as valid. The default
+	 * value in the "generic" section is zero. This means that :
+	 * - Scheduling mode is Bytes Per Second (BPS), indicated by Bit 0.
+	 * - 0 priority among siblings, indicated by Bit 1-3.
+	 * - WFQ, indicated by Bit 4.
+	 * - 0 Adjustment value is used in PSM credit update flow, indicated by
+	 * Bit 5-6.
+	 * - Bit 7 is reserved.
+	 * Without setting the generic section as valid in valid_sections, the
+	 * Admin queue command will fail with error code ICE_AQ_RC_EINVAL.
+	 */
+	buf->txqs[0].info.valid_sections =
+		ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+		ICE_AQC_ELEM_VALID_EIR;
+	buf->txqs[0].info.generic = 0;
+	buf->txqs[0].info.cir_bw.bw_profile_idx =
+		cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+	buf->txqs[0].info.cir_bw.bw_alloc =
+		cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+	buf->txqs[0].info.eir_bw.bw_profile_idx =
+		cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+	buf->txqs[0].info.eir_bw.bw_alloc =
+		cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+
+	/* add the LAN queue */
+	status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd);
+	if (status) {
+		ice_debug(hw, ICE_DBG_SCHED, "enable queue %d failed %d\n",
+			  le16_to_cpu(buf->txqs[0].txq_id),
+			  hw->adminq.sq_last_status);
+		goto ena_txq_exit;
+	}
+
+	node.node_teid = buf->txqs[0].q_teid;
+	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+	q_ctx->q_handle = q_handle;
+	q_ctx->q_teid = le32_to_cpu(node.node_teid);
+
+	/* add a leaf node into scheduler tree queue layer */
+	status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
+	if (!status)
+		status = ice_sched_replay_q_bw(pi, q_ctx);
+
+ena_txq_exit:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_dis_vsi_txq
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @num_queues: number of queues
+ * @q_handles: pointer to software queue handle array
+ * @q_ids: pointer to the q_id array
+ * @q_teids: pointer to queue node teids
+ * @rst_src: if called due to reset, specifies the reset source
+ * @vmvf_num: the relative VM or VF number that is undergoing the reset
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function removes queues and their corresponding nodes in SW DB
+ */
+enum ice_status
+ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
+		u16 *q_handles, u16 *q_ids, u32 *q_teids,
+		enum ice_disq_rst_src rst_src, u16 vmvf_num,
+		struct ice_sq_cd *cd)
+{
+	enum ice_status status = ICE_ERR_DOES_NOT_EXIST;
+	struct ice_aqc_dis_txq_item *qg_list;
+	struct ice_q_ctx *q_ctx;
+	struct ice_hw *hw;
+	u16 i, buf_size;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	hw = pi->hw;
+
+	if (!num_queues) {
+		/* if queue is disabled already yet the disable queue command
+		 * has to be sent to complete the VF reset, then call
+		 * ice_aq_dis_lan_txq without any queue information
+		 */
+		if (rst_src)
+			return ice_aq_dis_lan_txq(hw, 0, NULL, 0, rst_src,
+						  vmvf_num, NULL);
+		return ICE_ERR_CFG;
+	}
+
+	buf_size = struct_size(qg_list, q_id, 1);
+	qg_list = kzalloc(buf_size, GFP_KERNEL);
+	if (!qg_list)
+		return ICE_ERR_NO_MEMORY;
+
+	mutex_lock(&pi->sched_lock);
+
+	for (i = 0; i < num_queues; i++) {
+		struct ice_sched_node *node;
+
+		node = ice_sched_find_node_by_teid(pi->root, q_teids[i]);
+		if (!node)
+			continue;
+		q_ctx = ice_get_lan_q_ctx(hw, vsi_handle, tc, q_handles[i]);
+		if (!q_ctx) {
+			ice_debug(hw, ICE_DBG_SCHED, "invalid queue handle%d\n",
+				  q_handles[i]);
+			continue;
+		}
+		if (q_ctx->q_handle != q_handles[i]) {
+			ice_debug(hw, ICE_DBG_SCHED, "Err:handles %d %d\n",
+				  q_ctx->q_handle, q_handles[i]);
+			continue;
+		}
+		qg_list->parent_teid = node->info.parent_teid;
+		qg_list->num_qs = 1;
+		qg_list->q_id[0] = cpu_to_le16(q_ids[i]);
+		status = ice_aq_dis_lan_txq(hw, 1, qg_list, buf_size, rst_src,
+					    vmvf_num, cd);
+
+		if (status)
+			break;
+		ice_free_sched_node(pi, node);
+		q_ctx->q_handle = ICE_INVAL_Q_HANDLE;
+	}
+	mutex_unlock(&pi->sched_lock);
+	kfree(qg_list);
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_qs - configure the new/existing VSI queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @maxqs: max queues array per TC
+ * @owner: LAN or RDMA
+ *
+ * This function adds/updates the VSI queues per TC.
+ */
+static enum ice_status
+ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+	       u16 *maxqs, u8 owner)
+{
+	enum ice_status status = 0;
+	u8 i;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&pi->sched_lock);
+
+	ice_for_each_traffic_class(i) {
+		/* configuration is possible only if TC node is present */
+		if (!ice_sched_get_tc_node(pi, i))
+			continue;
+
+		status = ice_sched_cfg_vsi(pi, vsi_handle, i, maxqs[i], owner,
+					   ice_is_tc_ena(tc_bitmap, i));
+		if (status)
+			break;
+	}
+
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_lan - configure VSI LAN queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @max_lanqs: max LAN queues array per TC
+ *
+ * This function adds/updates the VSI LAN queues per TC.
+ */
+enum ice_status
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+		u16 *max_lanqs)
+{
+	return ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, max_lanqs,
+			      ICE_SCHED_NODE_OWNER_LAN);
+}
+
+/**
+ * ice_replay_pre_init - replay pre initialization
+ * @hw: pointer to the HW struct
+ *
+ * Initializes required config data for VSI, FD, ACL, and RSS before replay.
+ */
+static enum ice_status ice_replay_pre_init(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	u8 i;
+
+	/* Delete old entries from replay filter list head if there is any */
+	ice_rm_all_sw_replay_rule_info(hw);
+	/* In start of replay, move entries into replay_rules list, it
+	 * will allow adding rules entries back to filt_rules list,
+	 * which is operational list.
+	 */
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++)
+		list_replace_init(&sw->recp_list[i].filt_rules,
+				  &sw->recp_list[i].filt_replay_rules);
+
+	return 0;
+}
+
+/**
+ * ice_replay_vsi - replay VSI configuration
+ * @hw: pointer to the HW struct
+ * @vsi_handle: driver VSI handle
+ *
+ * Restore all VSI configuration after reset. It is required to call this
+ * function with main VSI first.
+ */
+enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle)
+{
+	enum ice_status status;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	/* Replay pre-initialization if there is any */
+	if (vsi_handle == ICE_MAIN_VSI_HANDLE) {
+		status = ice_replay_pre_init(hw);
+		if (status)
+			return status;
+	}
+	/* Replay per VSI all RSS configurations */
+	status = ice_replay_rss_cfg(hw, vsi_handle);
+	if (status)
+		return status;
+	/* Replay per VSI all filters */
+	status = ice_replay_vsi_all_fltr(hw, vsi_handle);
+	return status;
+}
+
+/**
+ * ice_replay_post - post replay configuration cleanup
+ * @hw: pointer to the HW struct
+ *
+ * Post replay cleanup.
+ */
+void ice_replay_post(struct ice_hw *hw)
+{
+	/* Delete old entries from replay filter list head */
+	ice_rm_all_sw_replay_rule_info(hw);
+}
+
+/**
+ * ice_stat_update40 - read 40 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: offset of 64 bit HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+void
+ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat)
+{
+	u64 new_data = rd64(hw, reg) & (BIT_ULL(40) - 1);
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. Thus, save the value from the first read
+	 * without adding to the statistic value so that we report stats which
+	 * count up from zero.
+	 */
+	if (!prev_stat_loaded) {
+		*prev_stat = new_data;
+		return;
+	}
+
+	/* Calculate the difference between the new and old values, and then
+	 * add it to the software stat value.
+	 */
+	if (new_data >= *prev_stat)
+		*cur_stat += new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat += (new_data + BIT_ULL(40)) - *prev_stat;
+
+	/* Update the previously stored value to prepare for next read */
+	*prev_stat = new_data;
+}
+
+/**
+ * ice_stat_update32 - read 32 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: offset of HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+void
+ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat)
+{
+	u32 new_data;
+
+	new_data = rd32(hw, reg);
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. Thus, save the value from the first read
+	 * without adding to the statistic value so that we report stats which
+	 * count up from zero.
+	 */
+	if (!prev_stat_loaded) {
+		*prev_stat = new_data;
+		return;
+	}
+
+	/* Calculate the difference between the new and old values, and then
+	 * add it to the software stat value.
+	 */
+	if (new_data >= *prev_stat)
+		*cur_stat += new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat += (new_data + BIT_ULL(32)) - *prev_stat;
+
+	/* Update the previously stored value to prepare for next read */
+	*prev_stat = new_data;
+}
+
+/**
+ * ice_sched_query_elem - query element information from HW
+ * @hw: pointer to the HW struct
+ * @node_teid: node TEID to be queried
+ * @buf: buffer to element information
+ *
+ * This function queries HW element information
+ */
+enum ice_status
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+		     struct ice_aqc_txsched_elem_data *buf)
+{
+	u16 buf_size, num_elem_ret = 0;
+	enum ice_status status;
+
+	buf_size = sizeof(*buf);
+	memset(buf, 0, buf_size);
+	buf->node_teid = cpu_to_le32(node_teid);
+	status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
+					  NULL);
+	if (status || num_elem_ret != 1)
+		ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
+	return status;
+}
+
+/**
+ * ice_fw_supports_link_override
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the firmware supports link override
+ */
+bool ice_fw_supports_link_override(struct ice_hw *hw)
+{
+	/* Currently, only supported for E810 devices */
+	if (hw->mac_type != ICE_MAC_E810)
+		return false;
+
+	if (hw->api_maj_ver == ICE_FW_API_LINK_OVERRIDE_MAJ) {
+		if (hw->api_min_ver > ICE_FW_API_LINK_OVERRIDE_MIN)
+			return true;
+		if (hw->api_min_ver == ICE_FW_API_LINK_OVERRIDE_MIN &&
+		    hw->api_patch >= ICE_FW_API_LINK_OVERRIDE_PATCH)
+			return true;
+	} else if (hw->api_maj_ver > ICE_FW_API_LINK_OVERRIDE_MAJ) {
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_get_link_default_override
+ * @ldo: pointer to the link default override struct
+ * @pi: pointer to the port info struct
+ *
+ * Gets the link default override for a port
+ */
+enum ice_status
+ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
+			      struct ice_port_info *pi)
+{
+	u16 i, tlv, tlv_len, tlv_start, buf, offset;
+	struct ice_hw *hw = pi->hw;
+	enum ice_status status;
+
+	status = ice_get_pfa_module_tlv(hw, &tlv, &tlv_len,
+					ICE_SR_LINK_DEFAULT_OVERRIDE_PTR);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read link override TLV.\n");
+		return status;
+	}
+
+	/* Each port has its own config; calculate for our port */
+	tlv_start = tlv + pi->lport * ICE_SR_PFA_LINK_OVERRIDE_WORDS +
+		ICE_SR_PFA_LINK_OVERRIDE_OFFSET;
+
+	/* link options first */
+	status = ice_read_sr_word(hw, tlv_start, &buf);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read override link options.\n");
+		return status;
+	}
+	ldo->options = buf & ICE_LINK_OVERRIDE_OPT_M;
+	ldo->phy_config = (buf & ICE_LINK_OVERRIDE_PHY_CFG_M) >>
+		ICE_LINK_OVERRIDE_PHY_CFG_S;
+
+	/* link PHY config */
+	offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_FEC_OFFSET;
+	status = ice_read_sr_word(hw, offset, &buf);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read override phy config.\n");
+		return status;
+	}
+	ldo->fec_options = buf & ICE_LINK_OVERRIDE_FEC_OPT_M;
+
+	/* PHY types low */
+	offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET;
+	for (i = 0; i < ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS; i++) {
+		status = ice_read_sr_word(hw, (offset + i), &buf);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT,
+				  "Failed to read override link options.\n");
+			return status;
+		}
+		/* shift 16 bits at a time to fill 64 bits */
+		ldo->phy_type_low |= ((u64)buf << (i * 16));
+	}
+
+	/* PHY types high */
+	offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET +
+		ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS;
+	for (i = 0; i < ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS; i++) {
+		status = ice_read_sr_word(hw, (offset + i), &buf);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT,
+				  "Failed to read override link options.\n");
+			return status;
+		}
+		/* shift 16 bits at a time to fill 64 bits */
+		ldo->phy_type_high |= ((u64)buf << (i * 16));
+	}
+
+	return status;
+}
+
+/**
+ * ice_is_phy_caps_an_enabled - check if PHY capabilities autoneg is enabled
+ * @caps: get PHY capability data
+ */
+bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps)
+{
+	if (caps->caps & ICE_AQC_PHY_AN_MODE ||
+	    caps->low_power_ctrl_an & (ICE_AQC_PHY_AN_EN_CLAUSE28 |
+				       ICE_AQC_PHY_AN_EN_CLAUSE73 |
+				       ICE_AQC_PHY_AN_EN_CLAUSE37))
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_aq_set_lldp_mib - Set the LLDP MIB
+ * @hw: pointer to the HW struct
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the LLDP MIB. (0x0A08)
+ */
+enum ice_status
+ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_set_local_mib *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_set_mib;
+
+	if (buf_size == 0 || !buf)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_local_mib);
+
+	desc.flags |= cpu_to_le16((u16)ICE_AQ_FLAG_RD);
+	desc.datalen = cpu_to_le16(buf_size);
+
+	cmd->type = mib_type;
+	cmd->length = cpu_to_le16(buf_size);
+
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
new file mode 100644
index 000000000..3ebb97387
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_COMMON_H_
+#define _ICE_COMMON_H_
+
+#include "ice.h"
+#include "ice_type.h"
+#include "ice_nvm.h"
+#include "ice_flex_pipe.h"
+#include "ice_switch.h"
+#include <linux/avf/virtchnl.h>
+
+enum ice_status ice_init_hw(struct ice_hw *hw);
+void ice_deinit_hw(struct ice_hw *hw);
+enum ice_status ice_check_reset(struct ice_hw *hw);
+enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
+enum ice_status ice_create_all_ctrlq(struct ice_hw *hw);
+enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
+void ice_shutdown_all_ctrlq(struct ice_hw *hw);
+void ice_destroy_all_ctrlq(struct ice_hw *hw);
+enum ice_status
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		  struct ice_rq_event_info *e, u16 *pending);
+enum ice_status
+ice_get_link_status(struct ice_port_info *pi, bool *link_up);
+enum ice_status ice_update_link_info(struct ice_port_info *pi);
+enum ice_status
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+		enum ice_aq_res_access_type access, u32 timeout);
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
+enum ice_status
+ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res);
+enum ice_status
+ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res);
+enum ice_status
+ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
+		      struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+		      enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+enum ice_status
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		struct ice_sq_cd *cd);
+void ice_clear_pxe_mode(struct ice_hw *hw);
+enum ice_status ice_get_caps(struct ice_hw *hw);
+
+void ice_set_safe_mode_caps(struct ice_hw *hw);
+
+enum ice_status
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+		  u32 rxq_index);
+
+enum ice_status
+ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut,
+		   u16 lut_size);
+enum ice_status
+ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut,
+		   u16 lut_size);
+enum ice_status
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
+		   struct ice_aqc_get_set_rss_keys *keys);
+enum ice_status
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
+		   struct ice_aqc_get_set_rss_keys *keys);
+
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
+enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
+extern const struct ice_ctx_ele ice_tlan_ctx_info[];
+enum ice_status
+ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+	    const struct ice_ctx_ele *ce_info);
+
+extern struct mutex ice_global_cfg_lock_sw;
+
+enum ice_status
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
+		void *buf, u16 buf_size, struct ice_sq_cd *cd);
+enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
+
+enum ice_status
+ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
+		       struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+		    struct ice_aqc_get_phy_caps_data *caps,
+		    struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
+		 enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+enum ice_status
+ice_discover_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_caps);
+void
+ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
+		    u16 link_speeds_bitmap);
+enum ice_status
+ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
+			struct ice_sq_cd *cd);
+enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
+enum ice_status
+ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
+		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd);
+bool ice_fw_supports_link_override(struct ice_hw *hw);
+enum ice_status
+ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
+			      struct ice_port_info *pi);
+bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps);
+
+enum ice_fc_mode ice_caps_to_fc_mode(u8 caps);
+enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options);
+enum ice_status
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures,
+	   bool ena_auto_link_update);
+enum ice_status
+ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+	       enum ice_fc_mode fc);
+bool
+ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *caps,
+			struct ice_aqc_set_phy_cfg_data *cfg);
+void
+ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
+			 struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg);
+enum ice_status
+ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+		enum ice_fec_mode fec);
+enum ice_status
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+			   struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+		     struct ice_link_status *link, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+		      struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
+
+enum ice_status
+ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
+		       struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+		  bool write, struct ice_sq_cd *cd);
+
+enum ice_status
+ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
+		u16 *q_handle, u16 *q_ids, u32 *q_teids,
+		enum ice_disq_rst_src rst_src, u16 vmvf_num,
+		struct ice_sq_cd *cd);
+enum ice_status
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+		u16 *max_lanqs);
+enum ice_status
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
+		u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+		struct ice_sq_cd *cd);
+enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
+void ice_replay_post(struct ice_hw *hw);
+void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
+void
+ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat);
+void
+ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat);
+enum ice_status
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+		     struct ice_aqc_txsched_elem_data *buf);
+enum ice_status
+ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
+		    struct ice_sq_cd *cd);
+#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
new file mode 100644
index 000000000..1f46a7828
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -0,0 +1,1198 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+
+#define ICE_CQ_INIT_REGS(qinfo, prefix)				\
+do {								\
+	(qinfo)->sq.head = prefix##_ATQH;			\
+	(qinfo)->sq.tail = prefix##_ATQT;			\
+	(qinfo)->sq.len = prefix##_ATQLEN;			\
+	(qinfo)->sq.bah = prefix##_ATQBAH;			\
+	(qinfo)->sq.bal = prefix##_ATQBAL;			\
+	(qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M;	\
+	(qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M;	\
+	(qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M;	\
+	(qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M;		\
+	(qinfo)->rq.head = prefix##_ARQH;			\
+	(qinfo)->rq.tail = prefix##_ARQT;			\
+	(qinfo)->rq.len = prefix##_ARQLEN;			\
+	(qinfo)->rq.bah = prefix##_ARQBAH;			\
+	(qinfo)->rq.bal = prefix##_ARQBAL;			\
+	(qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M;	\
+	(qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M;	\
+	(qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M;	\
+	(qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M;		\
+} while (0)
+
+/**
+ * ice_adminq_init_regs - Initialize AdminQ registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_adminq_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->adminq;
+
+	ICE_CQ_INIT_REGS(cq, PF_FW);
+}
+
+/**
+ * ice_mailbox_init_regs - Initialize Mailbox registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_mailbox_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->mailboxq;
+
+	ICE_CQ_INIT_REGS(cq, PF_MBX);
+}
+
+/**
+ * ice_check_sq_alive
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if Queue is enabled else false.
+ */
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	/* check both queue-length and queue-enable fields */
+	if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask)
+		return (rd32(hw, cq->sq.len) & (cq->sq.len_mask |
+						cq->sq.len_ena_mask)) ==
+			(cq->num_sq_entries | cq->sq.len_ena_mask);
+
+	return false;
+}
+
+/**
+ * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc);
+
+	cq->sq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+						 &cq->sq.desc_buf.pa,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!cq->sq.desc_buf.va)
+		return ICE_ERR_NO_MEMORY;
+	cq->sq.desc_buf.size = size;
+
+	cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+				      sizeof(struct ice_sq_cd), GFP_KERNEL);
+	if (!cq->sq.cmd_buf) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
+				   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
+		cq->sq.desc_buf.va = NULL;
+		cq->sq.desc_buf.pa = 0;
+		cq->sq.desc_buf.size = 0;
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc);
+
+	cq->rq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+						 &cq->rq.desc_buf.pa,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!cq->rq.desc_buf.va)
+		return ICE_ERR_NO_MEMORY;
+	cq->rq.desc_buf.size = size;
+	return 0;
+}
+
+/**
+ * ice_free_cq_ring - Free control queue ring
+ * @hw: pointer to the hardware structure
+ * @ring: pointer to the specific control queue ring
+ *
+ * This assumes the posted buffers have already been cleaned
+ * and de-allocated
+ */
+static void ice_free_cq_ring(struct ice_hw *hw, struct ice_ctl_q_ring *ring)
+{
+	dmam_free_coherent(ice_hw_to_dev(hw), ring->desc_buf.size,
+			   ring->desc_buf.va, ring->desc_buf.pa);
+	ring->desc_buf.va = NULL;
+	ring->desc_buf.pa = 0;
+	ring->desc_buf.size = 0;
+}
+
+/**
+ * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* We'll be allocating the buffer info memory first, then we can
+	 * allocate the mapped buffers for the event processing
+	 */
+	cq->rq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_rq_entries,
+				       sizeof(cq->rq.desc_buf), GFP_KERNEL);
+	if (!cq->rq.dma_head)
+		return ICE_ERR_NO_MEMORY;
+	cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < cq->num_rq_entries; i++) {
+		struct ice_aq_desc *desc;
+		struct ice_dma_mem *bi;
+
+		bi = &cq->rq.r.rq_bi[i];
+		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+					     cq->rq_buf_size, &bi->pa,
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!bi->va)
+			goto unwind_alloc_rq_bufs;
+		bi->size = cq->rq_buf_size;
+
+		/* now configure the descriptors for use */
+		desc = ICE_CTL_Q_DESC(cq->rq, i);
+
+		desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+		if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+		desc->opcode = 0;
+		/* This is in accordance with Admin queue design, there is no
+		 * register for buffer size configuration
+		 */
+		desc->datalen = cpu_to_le16(bi->size);
+		desc->retval = 0;
+		desc->cookie_high = 0;
+		desc->cookie_low = 0;
+		desc->params.generic.addr_high =
+			cpu_to_le32(upper_32_bits(bi->pa));
+		desc->params.generic.addr_low =
+			cpu_to_le32(lower_32_bits(bi->pa));
+		desc->params.generic.param0 = 0;
+		desc->params.generic.param1 = 0;
+	}
+	return 0;
+
+unwind_alloc_rq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
+				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
+		cq->rq.r.rq_bi[i].va = NULL;
+		cq->rq.r.rq_bi[i].pa = 0;
+		cq->rq.r.rq_bi[i].size = 0;
+	}
+	cq->rq.r.rq_bi = NULL;
+	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
+	cq->rq.dma_head = NULL;
+
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* No mapped memory needed yet, just the buffer info structures */
+	cq->sq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+				       sizeof(cq->sq.desc_buf), GFP_KERNEL);
+	if (!cq->sq.dma_head)
+		return ICE_ERR_NO_MEMORY;
+	cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < cq->num_sq_entries; i++) {
+		struct ice_dma_mem *bi;
+
+		bi = &cq->sq.r.sq_bi[i];
+		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+					     cq->sq_buf_size, &bi->pa,
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!bi->va)
+			goto unwind_alloc_sq_bufs;
+		bi->size = cq->sq_buf_size;
+	}
+	return 0;
+
+unwind_alloc_sq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.r.sq_bi[i].size,
+				   cq->sq.r.sq_bi[i].va, cq->sq.r.sq_bi[i].pa);
+		cq->sq.r.sq_bi[i].va = NULL;
+		cq->sq.r.sq_bi[i].pa = 0;
+		cq->sq.r.sq_bi[i].size = 0;
+	}
+	cq->sq.r.sq_bi = NULL;
+	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+	cq->sq.dma_head = NULL;
+
+	return ICE_ERR_NO_MEMORY;
+}
+
+static enum ice_status
+ice_cfg_cq_regs(struct ice_hw *hw, struct ice_ctl_q_ring *ring, u16 num_entries)
+{
+	/* Clear Head and Tail */
+	wr32(hw, ring->head, 0);
+	wr32(hw, ring->tail, 0);
+
+	/* set starting point */
+	wr32(hw, ring->len, (num_entries | ring->len_ena_mask));
+	wr32(hw, ring->bal, lower_32_bits(ring->desc_buf.pa));
+	wr32(hw, ring->bah, upper_32_bits(ring->desc_buf.pa));
+
+	/* Check one register to verify that config was applied */
+	if (rd32(hw, ring->bal) != lower_32_bits(ring->desc_buf.pa))
+		return ICE_ERR_AQ_ERROR;
+
+	return 0;
+}
+
+/**
+ * ice_cfg_sq_regs - configure Control ATQ registers
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the transmit queue
+ */
+static enum ice_status
+ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	return ice_cfg_cq_regs(hw, &cq->sq, cq->num_sq_entries);
+}
+
+/**
+ * ice_cfg_rq_regs - configure Control ARQ register
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the receive (event queue)
+ */
+static enum ice_status
+ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status status;
+
+	status = ice_cfg_cq_regs(hw, &cq->rq, cq->num_rq_entries);
+	if (status)
+		return status;
+
+	/* Update tail in the HW to post pre-allocated buffers */
+	wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1));
+
+	return 0;
+}
+
+#define ICE_FREE_CQ_BUFS(hw, qi, ring)					\
+do {									\
+	/* free descriptors */						\
+	if ((qi)->ring.r.ring##_bi) {					\
+		int i;							\
+									\
+		for (i = 0; i < (qi)->num_##ring##_entries; i++)	\
+			if ((qi)->ring.r.ring##_bi[i].pa) {		\
+				dmam_free_coherent(ice_hw_to_dev(hw),	\
+					(qi)->ring.r.ring##_bi[i].size,	\
+					(qi)->ring.r.ring##_bi[i].va,	\
+					(qi)->ring.r.ring##_bi[i].pa);	\
+					(qi)->ring.r.ring##_bi[i].va = NULL;\
+					(qi)->ring.r.ring##_bi[i].pa = 0;\
+					(qi)->ring.r.ring##_bi[i].size = 0;\
+		}							\
+	}								\
+	/* free the buffer info list */					\
+	if ((qi)->ring.cmd_buf)						\
+		devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf);	\
+	/* free DMA head */						\
+	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head);		\
+} while (0)
+
+/**
+ * ice_init_sq - main initialization routine for Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This is the main initialization routine for the Control Send Queue
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_sq_entries
+ *     - cq->sq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static enum ice_status ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code;
+
+	if (cq->sq.count > 0) {
+		/* queue already initialized */
+		ret_code = ICE_ERR_NOT_READY;
+		goto init_ctrlq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if (!cq->num_sq_entries || !cq->sq_buf_size) {
+		ret_code = ICE_ERR_CFG;
+		goto init_ctrlq_exit;
+	}
+
+	cq->sq.next_to_use = 0;
+	cq->sq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = ice_alloc_ctrlq_sq_ring(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = ice_alloc_sq_bufs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* initialize base registers */
+	ret_code = ice_cfg_sq_regs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* success! */
+	cq->sq.count = cq->num_sq_entries;
+	goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+	ICE_FREE_CQ_BUFS(hw, cq, sq);
+	ice_free_cq_ring(hw, &cq->sq);
+
+init_ctrlq_exit:
+	return ret_code;
+}
+
+/**
+ * ice_init_rq - initialize ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main initialization routine for the Admin Receive (Event) Queue.
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static enum ice_status ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code;
+
+	if (cq->rq.count > 0) {
+		/* queue already initialized */
+		ret_code = ICE_ERR_NOT_READY;
+		goto init_ctrlq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if (!cq->num_rq_entries || !cq->rq_buf_size) {
+		ret_code = ICE_ERR_CFG;
+		goto init_ctrlq_exit;
+	}
+
+	cq->rq.next_to_use = 0;
+	cq->rq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = ice_alloc_ctrlq_rq_ring(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = ice_alloc_rq_bufs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* initialize base registers */
+	ret_code = ice_cfg_rq_regs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* success! */
+	cq->rq.count = cq->num_rq_entries;
+	goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+	ICE_FREE_CQ_BUFS(hw, cq, rq);
+	ice_free_cq_ring(hw, &cq->rq);
+
+init_ctrlq_exit:
+	return ret_code;
+}
+
+/**
+ * ice_shutdown_sq - shutdown the Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Transmit Queue
+ */
+static enum ice_status
+ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code = 0;
+
+	mutex_lock(&cq->sq_lock);
+
+	if (!cq->sq.count) {
+		ret_code = ICE_ERR_NOT_READY;
+		goto shutdown_sq_out;
+	}
+
+	/* Stop firmware AdminQ processing */
+	wr32(hw, cq->sq.head, 0);
+	wr32(hw, cq->sq.tail, 0);
+	wr32(hw, cq->sq.len, 0);
+	wr32(hw, cq->sq.bal, 0);
+	wr32(hw, cq->sq.bah, 0);
+
+	cq->sq.count = 0;	/* to indicate uninitialized queue */
+
+	/* free ring buffers and the ring itself */
+	ICE_FREE_CQ_BUFS(hw, cq, sq);
+	ice_free_cq_ring(hw, &cq->sq);
+
+shutdown_sq_out:
+	mutex_unlock(&cq->sq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_aq_ver_check - Check the reported AQ API version.
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the driver should load on a given AQ API version.
+ *
+ * Return: 'true' iff the driver should attempt to load. 'false' otherwise.
+ */
+static bool ice_aq_ver_check(struct ice_hw *hw)
+{
+	if (hw->api_maj_ver > EXP_FW_API_VER_MAJOR) {
+		/* Major API version is newer than expected, don't load */
+		dev_warn(ice_hw_to_dev(hw),
+			 "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
+		return false;
+	} else if (hw->api_maj_ver == EXP_FW_API_VER_MAJOR) {
+		if (hw->api_min_ver > (EXP_FW_API_VER_MINOR + 2))
+			dev_info(ice_hw_to_dev(hw),
+				 "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
+		else if ((hw->api_min_ver + 2) < EXP_FW_API_VER_MINOR)
+			dev_info(ice_hw_to_dev(hw),
+				 "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+	} else {
+		/* Major API version is older than expected, log a warning */
+		dev_info(ice_hw_to_dev(hw),
+			 "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+	}
+	return true;
+}
+
+/**
+ * ice_shutdown_rq - shutdown Control ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Receive Queue
+ */
+static enum ice_status
+ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code = 0;
+
+	mutex_lock(&cq->rq_lock);
+
+	if (!cq->rq.count) {
+		ret_code = ICE_ERR_NOT_READY;
+		goto shutdown_rq_out;
+	}
+
+	/* Stop Control Queue processing */
+	wr32(hw, cq->rq.head, 0);
+	wr32(hw, cq->rq.tail, 0);
+	wr32(hw, cq->rq.len, 0);
+	wr32(hw, cq->rq.bal, 0);
+	wr32(hw, cq->rq.bah, 0);
+
+	/* set rq.count to 0 to indicate uninitialized queue */
+	cq->rq.count = 0;
+
+	/* free ring buffers and the ring itself */
+	ICE_FREE_CQ_BUFS(hw, cq, rq);
+	ice_free_cq_ring(hw, &cq->rq);
+
+shutdown_rq_out:
+	mutex_unlock(&cq->rq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_init_check_adminq - Check version for Admin Queue to know if its alive
+ * @hw: pointer to the hardware structure
+ */
+static enum ice_status ice_init_check_adminq(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->adminq;
+	enum ice_status status;
+
+	status = ice_aq_get_fw_ver(hw, NULL);
+	if (status)
+		goto init_ctrlq_free_rq;
+
+	if (!ice_aq_ver_check(hw)) {
+		status = ICE_ERR_FW_API_VER;
+		goto init_ctrlq_free_rq;
+	}
+
+	return 0;
+
+init_ctrlq_free_rq:
+	ice_shutdown_rq(hw, cq);
+	ice_shutdown_sq(hw, cq);
+	return status;
+}
+
+/**
+ * ice_init_ctrlq - main initialization routine for any control Queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ * NOTE: this function does not initialize the controlq locks
+ */
+static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+	enum ice_status ret_code;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		ice_adminq_init_regs(hw);
+		cq = &hw->adminq;
+		break;
+	case ICE_CTL_Q_MAILBOX:
+		ice_mailbox_init_regs(hw);
+		cq = &hw->mailboxq;
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	cq->qtype = q_type;
+
+	/* verify input for valid configuration */
+	if (!cq->num_rq_entries || !cq->num_sq_entries ||
+	    !cq->rq_buf_size || !cq->sq_buf_size) {
+		return ICE_ERR_CFG;
+	}
+
+	/* setup SQ command write back timeout */
+	cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT;
+
+	/* allocate the ATQ */
+	ret_code = ice_init_sq(hw, cq);
+	if (ret_code)
+		return ret_code;
+
+	/* allocate the ARQ */
+	ret_code = ice_init_rq(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_sq;
+
+	/* success! */
+	return 0;
+
+init_ctrlq_free_sq:
+	ice_shutdown_sq(hw, cq);
+	return ret_code;
+}
+
+/**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * NOTE: this function does not destroy the control queue locks.
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		if (ice_check_sq_alive(hw, cq))
+			ice_aq_q_shutdown(hw, true);
+		break;
+	case ICE_CTL_Q_MAILBOX:
+		cq = &hw->mailboxq;
+		break;
+	default:
+		return;
+	}
+
+	ice_shutdown_sq(hw, cq);
+	ice_shutdown_rq(hw, cq);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * NOTE: this function does not destroy the control queue locks. The driver
+ * may call this at runtime to shutdown and later restart control queues, such
+ * as in response to a reset event.
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+	/* Shutdown FW admin queue */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+	/* Shutdown PF-VF Mailbox */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
+ * ice_init_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, the driver MUST* set the following fields
+ * in the cq->structure for all control queues:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ * NOTE: this function does not initialize the controlq locks.
+ */
+enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
+{
+	enum ice_status status;
+	u32 retry = 0;
+
+	/* Init FW admin queue */
+	do {
+		status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+		if (status)
+			return status;
+
+		status = ice_init_check_adminq(hw);
+		if (status != ICE_ERR_AQ_FW_CRITICAL)
+			break;
+
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Retry Admin Queue init due to FW critical error\n");
+		ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+		msleep(ICE_CTL_Q_ADMIN_INIT_MSEC);
+	} while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);
+
+	if (status)
+		return status;
+	/* Init Mailbox queue */
+	return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
+ * ice_init_ctrlq_locks - Initialize locks for a control queue
+ * @cq: pointer to the control queue
+ *
+ * Initializes the send and receive queue locks for a given control queue.
+ */
+static void ice_init_ctrlq_locks(struct ice_ctl_q_info *cq)
+{
+	mutex_init(&cq->sq_lock);
+	mutex_init(&cq->rq_lock);
+}
+
+/**
+ * ice_create_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure for all control queues:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ * This function creates all the control queue locks and then calls
+ * ice_init_all_ctrlq. It should be called once during driver load. If the
+ * driver needs to re-initialize control queues at run time it should call
+ * ice_init_all_ctrlq instead.
+ */
+enum ice_status ice_create_all_ctrlq(struct ice_hw *hw)
+{
+	ice_init_ctrlq_locks(&hw->adminq);
+	ice_init_ctrlq_locks(&hw->mailboxq);
+
+	return ice_init_all_ctrlq(hw);
+}
+
+/**
+ * ice_destroy_ctrlq_locks - Destroy locks for a control queue
+ * @cq: pointer to the control queue
+ *
+ * Destroys the send and receive queue locks for a given control queue.
+ */
+static void ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq)
+{
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+}
+
+/**
+ * ice_destroy_all_ctrlq - exit routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * This function shuts down all the control queues and then destroys the
+ * control queue locks. It should be called once during driver unload. The
+ * driver should call ice_shutdown_all_ctrlq if it needs to shut down and
+ * reinitialize control queues, such as in response to a reset event.
+ */
+void ice_destroy_all_ctrlq(struct ice_hw *hw)
+{
+	/* shut down all the control queues first */
+	ice_shutdown_all_ctrlq(hw);
+
+	ice_destroy_ctrlq_locks(&hw->adminq);
+	ice_destroy_ctrlq_locks(&hw->mailboxq);
+}
+
+/**
+ * ice_clean_sq - cleans Admin send queue (ATQ)
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * returns the number of free desc
+ */
+static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	struct ice_ctl_q_ring *sq = &cq->sq;
+	u16 ntc = sq->next_to_clean;
+	struct ice_sq_cd *details;
+	struct ice_aq_desc *desc;
+
+	desc = ICE_CTL_Q_DESC(*sq, ntc);
+	details = ICE_CTL_Q_DETAILS(*sq, ntc);
+
+	while (rd32(hw, cq->sq.head) != ntc) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head));
+		memset(desc, 0, sizeof(*desc));
+		memset(details, 0, sizeof(*details));
+		ntc++;
+		if (ntc == sq->count)
+			ntc = 0;
+		desc = ICE_CTL_Q_DESC(*sq, ntc);
+		details = ICE_CTL_Q_DETAILS(*sq, ntc);
+	}
+
+	sq->next_to_clean = ntc;
+
+	return ICE_CTL_Q_DESC_UNUSED(sq);
+}
+
+/**
+ * ice_debug_cq
+ * @hw: pointer to the hardware structure
+ * @desc: pointer to control queue descriptor
+ * @buf: pointer to command buffer
+ * @buf_len: max length of buf
+ *
+ * Dumps debug log about control command with descriptor contents.
+ */
+static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
+{
+	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
+	u16 len;
+
+	if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
+	    !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask))
+		return;
+
+	if (!desc)
+		return;
+
+	len = le16_to_cpu(cq_desc->datalen);
+
+	ice_debug(hw, ICE_DBG_AQ_DESC,
+		  "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		  le16_to_cpu(cq_desc->opcode),
+		  le16_to_cpu(cq_desc->flags),
+		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->cookie_high),
+		  le32_to_cpu(cq_desc->cookie_low));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1)  0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.param0),
+		  le32_to_cpu(cq_desc->params.generic.param1));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l)   0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.addr_high),
+		  le32_to_cpu(cq_desc->params.generic.addr_low));
+	if (buf && cq_desc->datalen != 0) {
+		ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+
+		ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len);
+	}
+}
+
+/**
+ * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if the firmware has processed all descriptors on the
+ * admin send queue. Returns false if there are still requests pending.
+ */
+static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	/* AQ designers suggest use of head for better
+	 * timing reliability than DD bit
+	 */
+	return rd32(hw, cq->sq.head) == cq->sq.next_to_use;
+}
+
+/**
+ * ice_sq_send_cmd - send command to Control Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @desc: prefilled descriptor describing the command (non DMA mem)
+ * @buf: buffer to use for indirect commands (or NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * This is the main send command routine for the ATQ. It runs the queue,
+ * cleans the queue, etc.
+ */
+enum ice_status
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		struct ice_sq_cd *cd)
+{
+	struct ice_dma_mem *dma_buf = NULL;
+	struct ice_aq_desc *desc_on_ring;
+	bool cmd_completed = false;
+	enum ice_status status = 0;
+	struct ice_sq_cd *details;
+	u32 total_delay = 0;
+	u16 retval = 0;
+	u32 val = 0;
+
+	/* if reset is in progress return a soft error */
+	if (hw->reset_ongoing)
+		return ICE_ERR_RESET_ONGOING;
+	mutex_lock(&cq->sq_lock);
+
+	cq->sq_last_status = ICE_AQ_RC_OK;
+
+	if (!cq->sq.count) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Send queue not initialized.\n");
+		status = ICE_ERR_AQ_EMPTY;
+		goto sq_send_command_error;
+	}
+
+	if ((buf && !buf_size) || (!buf && buf_size)) {
+		status = ICE_ERR_PARAM;
+		goto sq_send_command_error;
+	}
+
+	if (buf) {
+		if (buf_size > cq->sq_buf_size) {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Invalid buffer size for Control Send queue: %d.\n",
+				  buf_size);
+			status = ICE_ERR_INVAL_SIZE;
+			goto sq_send_command_error;
+		}
+
+		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+		if (buf_size > ICE_AQ_LG_BUF)
+			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+	}
+
+	val = rd32(hw, cq->sq.head);
+	if (val >= cq->num_sq_entries) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "head overrun at %d in the Control Send Queue ring\n",
+			  val);
+		status = ICE_ERR_AQ_EMPTY;
+		goto sq_send_command_error;
+	}
+
+	details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use);
+	if (cd)
+		*details = *cd;
+	else
+		memset(details, 0, sizeof(*details));
+
+	/* Call clean and check queue available function to reclaim the
+	 * descriptors that were processed by FW/MBX; the function returns the
+	 * number of desc available. The clean function called here could be
+	 * called in a separate thread in case of asynchronous completions.
+	 */
+	if (ice_clean_sq(hw, cq) == 0) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Error: Control Send Queue is full.\n");
+		status = ICE_ERR_AQ_FULL;
+		goto sq_send_command_error;
+	}
+
+	/* initialize the temp desc pointer with the right desc */
+	desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use);
+
+	/* if the desc is available copy the temp desc to the right place */
+	memcpy(desc_on_ring, desc, sizeof(*desc_on_ring));
+
+	/* if buf is not NULL assume indirect command */
+	if (buf) {
+		dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use];
+		/* copy the user buf into the respective DMA buf */
+		memcpy(dma_buf->va, buf, buf_size);
+		desc_on_ring->datalen = cpu_to_le16(buf_size);
+
+		/* Update the address values in the desc with the pa value
+		 * for respective buffer
+		 */
+		desc_on_ring->params.generic.addr_high =
+			cpu_to_le32(upper_32_bits(dma_buf->pa));
+		desc_on_ring->params.generic.addr_low =
+			cpu_to_le32(lower_32_bits(dma_buf->pa));
+	}
+
+	/* Debug desc and buffer */
+	ice_debug(hw, ICE_DBG_AQ_DESC,
+		  "ATQ: Control Send queue desc and buffer:\n");
+
+	ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size);
+
+	(cq->sq.next_to_use)++;
+	if (cq->sq.next_to_use == cq->sq.count)
+		cq->sq.next_to_use = 0;
+	wr32(hw, cq->sq.tail, cq->sq.next_to_use);
+
+	do {
+		if (ice_sq_done(hw, cq))
+			break;
+
+		udelay(ICE_CTL_Q_SQ_CMD_USEC);
+		total_delay++;
+	} while (total_delay < cq->sq_cmd_timeout);
+
+	/* if ready, copy the desc back to temp */
+	if (ice_sq_done(hw, cq)) {
+		memcpy(desc, desc_on_ring, sizeof(*desc));
+		if (buf) {
+			/* get returned length to copy */
+			u16 copy_size = le16_to_cpu(desc->datalen);
+
+			if (copy_size > buf_size) {
+				ice_debug(hw, ICE_DBG_AQ_MSG,
+					  "Return len %d > than buf len %d\n",
+					  copy_size, buf_size);
+				status = ICE_ERR_AQ_ERROR;
+			} else {
+				memcpy(buf, dma_buf->va, copy_size);
+			}
+		}
+		retval = le16_to_cpu(desc->retval);
+		if (retval) {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Control Send Queue command 0x%04X completed with error 0x%X\n",
+				  le16_to_cpu(desc->opcode),
+				  retval);
+
+			/* strip off FW internal code */
+			retval &= 0xff;
+		}
+		cmd_completed = true;
+		if (!status && retval != ICE_AQ_RC_OK)
+			status = ICE_ERR_AQ_ERROR;
+		cq->sq_last_status = (enum ice_aq_err)retval;
+	}
+
+	ice_debug(hw, ICE_DBG_AQ_MSG,
+		  "ATQ: desc and buffer writeback:\n");
+
+	ice_debug_cq(hw, (void *)desc, buf, buf_size);
+
+	/* save writeback AQ if requested */
+	if (details->wb_desc)
+		memcpy(details->wb_desc, desc_on_ring,
+		       sizeof(*details->wb_desc));
+
+	/* update the error if time out occurred */
+	if (!cmd_completed) {
+		if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask ||
+		    rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) {
+			ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n");
+			status = ICE_ERR_AQ_FW_CRITICAL;
+		} else {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Control Send Queue Writeback timeout.\n");
+			status = ICE_ERR_AQ_TIMEOUT;
+		}
+	}
+
+sq_send_command_error:
+	mutex_unlock(&cq->sq_lock);
+	return status;
+}
+
+/**
+ * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function
+ * @desc: pointer to the temp descriptor (non DMA mem)
+ * @opcode: the opcode can be used to decide which flags to turn off or on
+ *
+ * Fill the desc with default values
+ */
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
+{
+	/* zero out the desc */
+	memset(desc, 0, sizeof(*desc));
+	desc->opcode = cpu_to_le16(opcode);
+	desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI);
+}
+
+/**
+ * ice_clean_rq_elem
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @e: event info from the receive descriptor, includes any buffers
+ * @pending: number of events that could be left to process
+ *
+ * This function cleans one Admin Receive Queue element and returns
+ * the contents through e. It can also return how many events are
+ * left to process through 'pending'.
+ */
+enum ice_status
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		  struct ice_rq_event_info *e, u16 *pending)
+{
+	u16 ntc = cq->rq.next_to_clean;
+	enum ice_status ret_code = 0;
+	struct ice_aq_desc *desc;
+	struct ice_dma_mem *bi;
+	u16 desc_idx;
+	u16 datalen;
+	u16 flags;
+	u16 ntu;
+
+	/* pre-clean the event info */
+	memset(&e->desc, 0, sizeof(e->desc));
+
+	/* take the lock before we start messing with the ring */
+	mutex_lock(&cq->rq_lock);
+
+	if (!cq->rq.count) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Receive queue not initialized.\n");
+		ret_code = ICE_ERR_AQ_EMPTY;
+		goto clean_rq_elem_err;
+	}
+
+	/* set next_to_use to head */
+	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+
+	if (ntu == ntc) {
+		/* nothing to do - shouldn't need to update ring's values */
+		ret_code = ICE_ERR_AQ_NO_WORK;
+		goto clean_rq_elem_out;
+	}
+
+	/* now clean the next descriptor */
+	desc = ICE_CTL_Q_DESC(cq->rq, ntc);
+	desc_idx = ntc;
+
+	cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
+	flags = le16_to_cpu(desc->flags);
+	if (flags & ICE_AQ_FLAG_ERR) {
+		ret_code = ICE_ERR_AQ_ERROR;
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Receive Queue Event 0x%04X received with error 0x%X\n",
+			  le16_to_cpu(desc->opcode),
+			  cq->rq_last_status);
+	}
+	memcpy(&e->desc, desc, sizeof(e->desc));
+	datalen = le16_to_cpu(desc->datalen);
+	e->msg_len = min_t(u16, datalen, e->buf_len);
+	if (e->msg_buf && e->msg_len)
+		memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
+
+	ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n");
+
+	ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size);
+
+	/* Restore the original datalen and buffer address in the desc,
+	 * FW updates datalen to indicate the event message size
+	 */
+	bi = &cq->rq.r.rq_bi[ntc];
+	memset(desc, 0, sizeof(*desc));
+
+	desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+	if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+	desc->datalen = cpu_to_le16(bi->size);
+	desc->params.generic.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
+	desc->params.generic.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
+
+	/* set tail = the last cleaned desc index. */
+	wr32(hw, cq->rq.tail, ntc);
+	/* ntc is updated to tail + 1 */
+	ntc++;
+	if (ntc == cq->num_rq_entries)
+		ntc = 0;
+	cq->rq.next_to_clean = ntc;
+	cq->rq.next_to_use = ntu;
+
+clean_rq_elem_out:
+	/* Set pending if needed, unlock and return */
+	if (pending) {
+		/* re-read HW head to calculate actual pending messages */
+		ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+		*pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc));
+	}
+clean_rq_elem_err:
+	mutex_unlock(&cq->rq_lock);
+
+	return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
new file mode 100644
index 000000000..68866f4f0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_CONTROLQ_H_
+#define _ICE_CONTROLQ_H_
+
+#include "ice_adminq_cmd.h"
+
+/* Maximum buffer lengths for all control queue types */
+#define ICE_AQ_MAX_BUF_LEN 4096
+#define ICE_MBXQ_MAX_BUF_LEN 4096
+
+#define ICE_CTL_Q_DESC(R, i) \
+	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
+
+#define ICE_CTL_Q_DESC_UNUSED(R) \
+	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	      (R)->next_to_clean - (R)->next_to_use - 1)
+
+/* Defines that help manage the driver vs FW API checks.
+ * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
+ */
+#define EXP_FW_API_VER_BRANCH		0x00
+#define EXP_FW_API_VER_MAJOR		0x01
+#define EXP_FW_API_VER_MINOR		0x05
+
+/* Different control queue types: These are mainly for SW consumption. */
+enum ice_ctl_q {
+	ICE_CTL_Q_UNKNOWN = 0,
+	ICE_CTL_Q_ADMIN,
+	ICE_CTL_Q_MAILBOX,
+};
+
+/* Control Queue timeout settings - max delay 1s */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT	10000 /* Count 10000 times */
+#define ICE_CTL_Q_SQ_CMD_USEC		100   /* Check every 100usec */
+#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT	10    /* Count 10 times */
+#define ICE_CTL_Q_ADMIN_INIT_MSEC	100   /* Check every 100msec */
+
+struct ice_ctl_q_ring {
+	void *dma_head;			/* Virtual address to DMA head */
+	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
+	void *cmd_buf;			/* command buffer memory */
+
+	union {
+		struct ice_dma_mem *sq_bi;
+		struct ice_dma_mem *rq_bi;
+	} r;
+
+	u16 count;		/* Number of descriptors */
+
+	/* used for interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	/* used for queue tracking */
+	u32 head;
+	u32 tail;
+	u32 len;
+	u32 bah;
+	u32 bal;
+	u32 len_mask;
+	u32 len_ena_mask;
+	u32 len_crit_mask;
+	u32 head_mask;
+};
+
+/* sq transaction details */
+struct ice_sq_cd {
+	struct ice_aq_desc *wb_desc;
+};
+
+#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
+
+/* rq event information */
+struct ice_rq_event_info {
+	struct ice_aq_desc desc;
+	u16 msg_len;
+	u16 buf_len;
+	u8 *msg_buf;
+};
+
+/* Control Queue information */
+struct ice_ctl_q_info {
+	enum ice_ctl_q qtype;
+	enum ice_aq_err rq_last_status;	/* last status on receive queue */
+	struct ice_ctl_q_ring rq;	/* receive queue */
+	struct ice_ctl_q_ring sq;	/* send queue */
+	u32 sq_cmd_timeout;		/* send queue cmd write back timeout */
+	u16 num_rq_entries;		/* receive queue depth */
+	u16 num_sq_entries;		/* send queue depth */
+	u16 rq_buf_size;		/* receive queue buffer size */
+	u16 sq_buf_size;		/* send queue buffer size */
+	enum ice_aq_err sq_last_status;	/* last status on send queue */
+	struct mutex sq_lock;		/* Send queue lock */
+	struct mutex rq_lock;		/* Receive queue lock */
+};
+
+#endif /* _ICE_CONTROLQ_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
new file mode 100644
index 000000000..28e834a12
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -0,0 +1,1428 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_dcb.h"
+
+/**
+ * ice_aq_get_lldp_mib
+ * @hw: pointer to the HW struct
+ * @bridge_type: type of bridge requested
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @local_len: length of the returned Local LLDP MIB
+ * @remote_len: length of the returned Remote LLDP MIB
+ * @cd: pointer to command details structure or NULL
+ *
+ * Requests the complete LLDP MIB (entire packet). (0x0A00)
+ */
+static enum ice_status
+ice_aq_get_lldp_mib(struct ice_hw *hw, u8 bridge_type, u8 mib_type, void *buf,
+		    u16 buf_size, u16 *local_len, u16 *remote_len,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_get_mib *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.lldp_get_mib;
+
+	if (buf_size == 0 || !buf)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_get_mib);
+
+	cmd->type = mib_type & ICE_AQ_LLDP_MIB_TYPE_M;
+	cmd->type |= (bridge_type << ICE_AQ_LLDP_BRID_TYPE_S) &
+		ICE_AQ_LLDP_BRID_TYPE_M;
+
+	desc.datalen = cpu_to_le16(buf_size);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status) {
+		if (local_len)
+			*local_len = le16_to_cpu(cmd->local_len);
+		if (remote_len)
+			*remote_len = le16_to_cpu(cmd->remote_len);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_update: Enable or Disable event posting
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes (0x0A01)
+ */
+static enum ice_status
+ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
+			   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_set_mib_change *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_set_event;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_mib_change);
+
+	if (!ena_update)
+		cmd->command |= ICE_AQ_LLDP_MIB_UPDATE_DIS;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_stop_lldp
+ * @hw: pointer to the HW struct
+ * @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown
+ *			 False if LLDP Agent needs to be Stopped
+ * @persist: True if Stop/Shutdown of LLDP Agent needs to be persistent across
+ *	     reboots
+ * @cd: pointer to command details structure or NULL
+ *
+ * Stop or Shutdown the embedded LLDP Agent (0x0A05)
+ */
+enum ice_status
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
+		 struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_stop *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_stop;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_stop);
+
+	if (shutdown_lldp_agent)
+		cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN;
+
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_DIS;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_start_lldp
+ * @hw: pointer to the HW struct
+ * @persist: True if Start of LLDP Agent needs to be persistent across reboots
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start the embedded LLDP Agent on all ports. (0x0A06)
+ */
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_start *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_start;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_start);
+
+	cmd->command = ICE_AQ_LLDP_AGENT_START;
+
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_ENA;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_get_dcbx_status
+ * @hw: pointer to the HW struct
+ *
+ * Get the DCBX status from the Firmware
+ */
+static u8 ice_get_dcbx_status(struct ice_hw *hw)
+{
+	u32 reg;
+
+	reg = rd32(hw, PRTDCB_GENS);
+	return (u8)((reg & PRTDCB_GENS_DCBX_STATUS_M) >>
+		    PRTDCB_GENS_DCBX_STATUS_S);
+}
+
+/**
+ * ice_parse_ieee_ets_common_tlv
+ * @buf: Data buffer to be parsed for ETS CFG/REC data
+ * @ets_cfg: Container to store parsed data
+ *
+ * Parses the common data of IEEE 802.1Qaz ETS CFG/REC TLV
+ */
+static void
+ice_parse_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+	u8 offset = 0;
+	int i;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		ets_cfg->prio_table[i * 2] =
+			((buf[offset] & ICE_IEEE_ETS_PRIO_1_M) >>
+			 ICE_IEEE_ETS_PRIO_1_S);
+		ets_cfg->prio_table[i * 2 + 1] =
+			((buf[offset] & ICE_IEEE_ETS_PRIO_0_M) >>
+			 ICE_IEEE_ETS_PRIO_0_S);
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 *
+	 * TSA Assignment Table (8 octets)
+	 * Octets:| 9 | 10| 11| 12| 13| 14| 15| 16|
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		ets_cfg->tcbwtable[i] = buf[offset];
+		ets_cfg->tsatable[i] = buf[ICE_MAX_TRAFFIC_CLASS + offset++];
+	}
+}
+
+/**
+ * ice_parse_ieee_etscfg_tlv
+ * @tlv: IEEE 802.1Qaz ETS CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_parse_ieee_etscfg_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+
+	/* First Octet post subtype
+	 * --------------------------
+	 * |will-|CBS  | Re-  | Max |
+	 * |ing  |     |served| TCs |
+	 * --------------------------
+	 * |1bit | 1bit|3 bits|3bits|
+	 */
+	etscfg = &dcbcfg->etscfg;
+	etscfg->willing = ((buf[0] & ICE_IEEE_ETS_WILLING_M) >>
+			   ICE_IEEE_ETS_WILLING_S);
+	etscfg->cbs = ((buf[0] & ICE_IEEE_ETS_CBS_M) >> ICE_IEEE_ETS_CBS_S);
+	etscfg->maxtcs = ((buf[0] & ICE_IEEE_ETS_MAXTC_M) >>
+			  ICE_IEEE_ETS_MAXTC_S);
+
+	/* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+	ice_parse_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_parse_ieee_etsrec_tlv
+ * @tlv: IEEE 802.1Qaz ETS REC TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Parses IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_parse_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	/* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+	ice_parse_ieee_ets_common_tlv(&buf[1], &dcbcfg->etsrec);
+}
+
+/**
+ * ice_parse_ieee_pfccfg_tlv
+ * @tlv: IEEE 802.1Qaz PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_parse_ieee_pfccfg_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	/* ----------------------------------------
+	 * |will-|MBC  | Re-  | PFC |  PFC Enable  |
+	 * |ing  |     |served| cap |              |
+	 * -----------------------------------------
+	 * |1bit | 1bit|2 bits|4bits| 1 octet      |
+	 */
+	dcbcfg->pfc.willing = ((buf[0] & ICE_IEEE_PFC_WILLING_M) >>
+			       ICE_IEEE_PFC_WILLING_S);
+	dcbcfg->pfc.mbc = ((buf[0] & ICE_IEEE_PFC_MBC_M) >> ICE_IEEE_PFC_MBC_S);
+	dcbcfg->pfc.pfccap = ((buf[0] & ICE_IEEE_PFC_CAP_M) >>
+			      ICE_IEEE_PFC_CAP_S);
+	dcbcfg->pfc.pfcena = buf[1];
+}
+
+/**
+ * ice_parse_ieee_app_tlv
+ * @tlv: IEEE 802.1Qaz APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses IEEE 802.1Qaz APP PRIO TLV
+ */
+static void
+ice_parse_ieee_app_tlv(struct ice_lldp_org_tlv *tlv,
+		       struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 offset = 0;
+	u16 typelen;
+	int i = 0;
+	u16 len;
+	u8 *buf;
+
+	typelen = ntohs(tlv->typelen);
+	len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+	buf = tlv->tlvinfo;
+
+	/* Removing sizeof(ouisubtype) and reserved byte from len.
+	 * Remaining len div 3 is number of APP TLVs.
+	 */
+	len -= (sizeof(tlv->ouisubtype) + 1);
+
+	/* Move offset to App Priority Table */
+	offset++;
+
+	/* Application Priority Table (3 octets)
+	 * Octets:|         1          |    2    |    3    |
+	 *        -----------------------------------------
+	 *        |Priority|Rsrvd| Sel |    Protocol ID    |
+	 *        -----------------------------------------
+	 *   Bits:|23    21|20 19|18 16|15                0|
+	 *        -----------------------------------------
+	 */
+	while (offset < len) {
+		dcbcfg->app[i].priority = ((buf[offset] &
+					    ICE_IEEE_APP_PRIO_M) >>
+					   ICE_IEEE_APP_PRIO_S);
+		dcbcfg->app[i].selector = ((buf[offset] &
+					    ICE_IEEE_APP_SEL_M) >>
+					   ICE_IEEE_APP_SEL_S);
+		dcbcfg->app[i].prot_id = (buf[offset + 1] << 0x8) |
+			buf[offset + 2];
+		/* Move to next app */
+		offset += 3;
+		i++;
+		if (i >= ICE_DCBX_MAX_APPS)
+			break;
+	}
+
+	dcbcfg->numapps = i;
+}
+
+/**
+ * ice_parse_ieee_tlv
+ * @tlv: IEEE 802.1Qaz TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_ieee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u32 ouisubtype;
+	u8 subtype;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+		       ICE_LLDP_TLV_SUBTYPE_S);
+	switch (subtype) {
+	case ICE_IEEE_SUBTYPE_ETS_CFG:
+		ice_parse_ieee_etscfg_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_ETS_REC:
+		ice_parse_ieee_etsrec_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_PFC_CFG:
+		ice_parse_ieee_pfccfg_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_APP_PRI:
+		ice_parse_ieee_app_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_parse_cee_pgcfg_tlv
+ * @tlv: CEE DCBX PG CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses CEE DCBX PG CFG TLV
+ */
+static void
+ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv,
+			struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u16 offset = 0;
+	int i;
+
+	etscfg = &dcbcfg->etscfg;
+
+	if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+		etscfg->willing = 1;
+
+	etscfg->cbs = 0;
+	/* Priority Group Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		etscfg->prio_table[i * 2] =
+			((buf[offset] & ICE_CEE_PGID_PRIO_1_M) >>
+			 ICE_CEE_PGID_PRIO_1_S);
+		etscfg->prio_table[i * 2 + 1] =
+			((buf[offset] & ICE_CEE_PGID_PRIO_0_M) >>
+			 ICE_CEE_PGID_PRIO_0_S);
+		offset++;
+	}
+
+	/* PG Percentage Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		etscfg->tcbwtable[i] = buf[offset++];
+
+		if (etscfg->prio_table[i] == ICE_CEE_PGID_STRICT)
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+		else
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+	}
+
+	/* Number of TCs supported (1 octet) */
+	etscfg->maxtcs = buf[offset];
+}
+
+/**
+ * ice_parse_cee_pfccfg_tlv
+ * @tlv: CEE DCBX PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses CEE DCBX PFC CFG TLV
+ */
+static void
+ice_parse_cee_pfccfg_tlv(struct ice_cee_feat_tlv *tlv,
+			 struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+		dcbcfg->pfc.willing = 1;
+
+	/* ------------------------
+	 * | PFC Enable | PFC TCs |
+	 * ------------------------
+	 * | 1 octet    | 1 octet |
+	 */
+	dcbcfg->pfc.pfcena = buf[0];
+	dcbcfg->pfc.pfccap = buf[1];
+}
+
+/**
+ * ice_parse_cee_app_tlv
+ * @tlv: CEE DCBX APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses CEE DCBX APP PRIO TLV
+ */
+static void
+ice_parse_cee_app_tlv(struct ice_cee_feat_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 len, typelen, offset = 0;
+	struct ice_cee_app_prio *app;
+	u8 i;
+
+	typelen = ntohs(tlv->hdr.typelen);
+	len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+
+	dcbcfg->numapps = len / sizeof(*app);
+	if (!dcbcfg->numapps)
+		return;
+	if (dcbcfg->numapps > ICE_DCBX_MAX_APPS)
+		dcbcfg->numapps = ICE_DCBX_MAX_APPS;
+
+	for (i = 0; i < dcbcfg->numapps; i++) {
+		u8 up, selector;
+
+		app = (struct ice_cee_app_prio *)(tlv->tlvinfo + offset);
+		for (up = 0; up < ICE_MAX_USER_PRIORITY; up++)
+			if (app->prio_map & BIT(up))
+				break;
+
+		dcbcfg->app[i].priority = up;
+
+		/* Get Selector from lower 2 bits, and convert to IEEE */
+		selector = (app->upper_oui_sel & ICE_CEE_APP_SELECTOR_M);
+		switch (selector) {
+		case ICE_CEE_APP_SEL_ETHTYPE:
+			dcbcfg->app[i].selector = ICE_APP_SEL_ETHTYPE;
+			break;
+		case ICE_CEE_APP_SEL_TCPIP:
+			dcbcfg->app[i].selector = ICE_APP_SEL_TCPIP;
+			break;
+		default:
+			/* Keep selector as it is for unknown types */
+			dcbcfg->app[i].selector = selector;
+		}
+
+		dcbcfg->app[i].prot_id = ntohs(app->protocol);
+		/* Move to next app */
+		offset += sizeof(*app);
+	}
+}
+
+/**
+ * ice_parse_cee_tlv
+ * @tlv: CEE DCBX TLV
+ * @dcbcfg: Local store to update DCBX config data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_cee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_cee_feat_tlv *sub_tlv;
+	u8 subtype, feat_tlv_count = 0;
+	u16 len, tlvlen, typelen;
+	u32 ouisubtype;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+		       ICE_LLDP_TLV_SUBTYPE_S);
+	/* Return if not CEE DCBX */
+	if (subtype != ICE_CEE_DCBX_TYPE)
+		return;
+
+	typelen = ntohs(tlv->typelen);
+	tlvlen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+	len = sizeof(tlv->typelen) + sizeof(ouisubtype) +
+		sizeof(struct ice_cee_ctrl_tlv);
+	/* Return if no CEE DCBX Feature TLVs */
+	if (tlvlen <= len)
+		return;
+
+	sub_tlv = (struct ice_cee_feat_tlv *)((char *)tlv + len);
+	while (feat_tlv_count < ICE_CEE_MAX_FEAT_TYPE) {
+		u16 sublen;
+
+		typelen = ntohs(sub_tlv->hdr.typelen);
+		sublen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+		subtype = (u8)((typelen & ICE_LLDP_TLV_TYPE_M) >>
+			       ICE_LLDP_TLV_TYPE_S);
+		switch (subtype) {
+		case ICE_CEE_SUBTYPE_PG_CFG:
+			ice_parse_cee_pgcfg_tlv(sub_tlv, dcbcfg);
+			break;
+		case ICE_CEE_SUBTYPE_PFC_CFG:
+			ice_parse_cee_pfccfg_tlv(sub_tlv, dcbcfg);
+			break;
+		case ICE_CEE_SUBTYPE_APP_PRI:
+			ice_parse_cee_app_tlv(sub_tlv, dcbcfg);
+			break;
+		default:
+			return;	/* Invalid Sub-type return */
+		}
+		feat_tlv_count++;
+		/* Move to next sub TLV */
+		sub_tlv = (struct ice_cee_feat_tlv *)
+			  ((char *)sub_tlv + sizeof(sub_tlv->hdr.typelen) +
+			   sublen);
+	}
+}
+
+/**
+ * ice_parse_org_tlv
+ * @tlv: Organization specific TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Currently only IEEE 802.1Qaz TLV is supported, all others
+ * will be returned
+ */
+static void
+ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u32 ouisubtype;
+	u32 oui;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	oui = ((ouisubtype & ICE_LLDP_TLV_OUI_M) >> ICE_LLDP_TLV_OUI_S);
+	switch (oui) {
+	case ICE_IEEE_8021QAZ_OUI:
+		ice_parse_ieee_tlv(tlv, dcbcfg);
+		break;
+	case ICE_CEE_DCBX_OUI:
+		ice_parse_cee_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_lldp_to_dcb_cfg
+ * @lldpmib: LLDPDU to be parsed
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Parse DCB configuration from the LLDPDU
+ */
+static enum ice_status
+ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_lldp_org_tlv *tlv;
+	enum ice_status ret = 0;
+	u16 offset = 0;
+	u16 typelen;
+	u16 type;
+	u16 len;
+
+	if (!lldpmib || !dcbcfg)
+		return ICE_ERR_PARAM;
+
+	/* set to the start of LLDPDU */
+	lldpmib += ETH_HLEN;
+	tlv = (struct ice_lldp_org_tlv *)lldpmib;
+	while (1) {
+		typelen = ntohs(tlv->typelen);
+		type = ((typelen & ICE_LLDP_TLV_TYPE_M) >> ICE_LLDP_TLV_TYPE_S);
+		len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+		offset += sizeof(typelen) + len;
+
+		/* END TLV or beyond LLDPDU size */
+		if (type == ICE_TLV_TYPE_END || offset > ICE_LLDPDU_SIZE)
+			break;
+
+		switch (type) {
+		case ICE_TLV_TYPE_ORG:
+			ice_parse_org_tlv(tlv, dcbcfg);
+			break;
+		default:
+			break;
+		}
+
+		/* Move to next TLV */
+		tlv = (struct ice_lldp_org_tlv *)
+		      ((char *)tlv + sizeof(tlv->typelen) + len);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_aq_get_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @mib_type: MIB type for the query
+ * @bridgetype: bridge type for the query (remote)
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Query DCB configuration from the firmware
+ */
+enum ice_status
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+		   struct ice_dcbx_cfg *dcbcfg)
+{
+	enum ice_status ret;
+	u8 *lldpmib;
+
+	/* Allocate the LLDPDU */
+	lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+	if (!lldpmib)
+		return ICE_ERR_NO_MEMORY;
+
+	ret = ice_aq_get_lldp_mib(hw, bridgetype, mib_type, (void *)lldpmib,
+				  ICE_LLDPDU_SIZE, NULL, NULL, NULL);
+
+	if (!ret)
+		/* Parse LLDP MIB to get DCB configuration */
+		ret = ice_lldp_to_dcb_cfg(lldpmib, dcbcfg);
+
+	devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+	return ret;
+}
+
+/**
+ * ice_aq_start_stop_dcbx - Start/Stop DCBX service in FW
+ * @hw: pointer to the HW struct
+ * @start_dcbx_agent: True if DCBX Agent needs to be started
+ *		      False if DCBX Agent needs to be stopped
+ * @dcbx_agent_status: FW indicates back the DCBX agent status
+ *		       True if DCBX Agent is active
+ *		       False if DCBX Agent is stopped
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start/Stop the embedded dcbx Agent. In case that this wrapper function
+ * returns ICE_SUCCESS, caller will need to check if FW returns back the same
+ * value as stated in dcbx_agent_status, and react accordingly. (0x0A09)
+ */
+enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+		       bool *dcbx_agent_status, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_stop_start_specific_agent *cmd;
+	enum ice_status status;
+	struct ice_aq_desc desc;
+	u16 opcode;
+
+	cmd = &desc.params.lldp_agent_ctrl;
+
+	opcode = ice_aqc_opc_lldp_stop_start_specific_agent;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+
+	if (start_dcbx_agent)
+		cmd->command = ICE_AQC_START_STOP_AGENT_START_DCBX;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	*dcbx_agent_status = false;
+
+	if (!status &&
+	    cmd->command == ICE_AQC_START_STOP_AGENT_START_DCBX)
+		*dcbx_agent_status = true;
+
+	return status;
+}
+
+/**
+ * ice_aq_get_cee_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @buff: response buffer that stores CEE operational configuration
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get CEE DCBX mode operational configuration from firmware (0x0A07)
+ */
+static enum ice_status
+ice_aq_get_cee_dcb_cfg(struct ice_hw *hw,
+		       struct ice_aqc_get_cee_dcb_cfg_resp *buff,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cee_dcb_cfg);
+
+	return ice_aq_send_cmd(hw, &desc, (void *)buff, sizeof(*buff), cd);
+}
+
+/**
+ * ice_cee_to_dcb_cfg
+ * @cee_cfg: pointer to CEE configuration struct
+ * @pi: port information structure
+ *
+ * Convert CEE configuration from firmware to DCB configuration
+ */
+static void
+ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
+		   struct ice_port_info *pi)
+{
+	u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status);
+	u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift, j;
+	u8 i, err, sync, oper, app_index, ice_app_sel_type;
+	u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio);
+	u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift;
+	struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg;
+	u16 ice_app_prot_id_type;
+
+	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+	dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
+	dcbcfg->tlv_status = tlv_status;
+
+	/* CEE PG data */
+	dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
+
+	/* Note that the FW creates the oper_prio_tc nibbles reversed
+	 * from those in the CEE Priority Group sub-TLV.
+	 */
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+		dcbcfg->etscfg.prio_table[i * 2] =
+			((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_0_M) >>
+			 ICE_CEE_PGID_PRIO_0_S);
+		dcbcfg->etscfg.prio_table[i * 2 + 1] =
+			((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_1_M) >>
+			 ICE_CEE_PGID_PRIO_1_S);
+	}
+
+	ice_for_each_traffic_class(i) {
+		dcbcfg->etscfg.tcbwtable[i] = cee_cfg->oper_tc_bw[i];
+
+		if (dcbcfg->etscfg.prio_table[i] == ICE_CEE_PGID_STRICT) {
+			/* Map it to next empty TC */
+			dcbcfg->etscfg.prio_table[i] = cee_cfg->oper_num_tc - 1;
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+		} else {
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+		}
+	}
+
+	/* CEE PFC data */
+	dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en;
+	dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
+
+	/* CEE APP TLV data */
+	if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+		cmp_dcbcfg = &pi->qos_cfg.desired_dcbx_cfg;
+	else
+		cmp_dcbcfg = &pi->qos_cfg.remote_dcbx_cfg;
+
+	app_index = 0;
+	for (i = 0; i < 3; i++) {
+		if (i == 0) {
+			/* FCoE APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_FCOE_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_FCOE_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S;
+			ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+			ice_app_prot_id_type = ICE_APP_PROT_ID_FCOE;
+		} else if (i == 1) {
+			/* iSCSI APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_ISCSI_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S;
+			ice_app_sel_type = ICE_APP_SEL_TCPIP;
+			ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI;
+
+			for (j = 0; j < cmp_dcbcfg->numapps; j++) {
+				u16 prot_id = cmp_dcbcfg->app[j].prot_id;
+				u8 sel = cmp_dcbcfg->app[j].selector;
+
+				if  (sel == ICE_APP_SEL_TCPIP &&
+				     (prot_id == ICE_APP_PROT_ID_ISCSI ||
+				      prot_id == ICE_APP_PROT_ID_ISCSI_860)) {
+					ice_app_prot_id_type = prot_id;
+					break;
+				}
+			}
+		} else {
+			/* FIP APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_FIP_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S;
+			ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+			ice_app_prot_id_type = ICE_APP_PROT_ID_FIP;
+		}
+
+		status = (tlv_status & ice_aqc_cee_status_mask) >>
+			 ice_aqc_cee_status_shift;
+		err = (status & ICE_TLV_STATUS_ERR) ? 1 : 0;
+		sync = (status & ICE_TLV_STATUS_SYNC) ? 1 : 0;
+		oper = (status & ICE_TLV_STATUS_OPER) ? 1 : 0;
+		/* Add FCoE/iSCSI/FIP APP if Error is False and
+		 * Oper/Sync is True
+		 */
+		if (!err && sync && oper) {
+			dcbcfg->app[app_index].priority =
+				(app_prio & ice_aqc_cee_app_mask) >>
+				ice_aqc_cee_app_shift;
+			dcbcfg->app[app_index].selector = ice_app_sel_type;
+			dcbcfg->app[app_index].prot_id = ice_app_prot_id_type;
+			app_index++;
+		}
+	}
+
+	dcbcfg->numapps = app_index;
+}
+
+/**
+ * ice_get_ieee_dcb_cfg
+ * @pi: port information structure
+ * @dcbx_mode: mode of DCBX (IEEE or CEE)
+ *
+ * Get IEEE or CEE mode DCB configuration from the Firmware
+ */
+static enum ice_status
+ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
+{
+	struct ice_dcbx_cfg *dcbx_cfg = NULL;
+	enum ice_status ret;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	if (dcbx_mode == ICE_DCBX_MODE_IEEE)
+		dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+	else if (dcbx_mode == ICE_DCBX_MODE_CEE)
+		dcbx_cfg = &pi->qos_cfg.desired_dcbx_cfg;
+
+	/* Get Local DCB Config in case of ICE_DCBX_MODE_IEEE
+	 * or get CEE DCB Desired Config in case of ICE_DCBX_MODE_CEE
+	 */
+	ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_LOCAL,
+				 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+	if (ret)
+		goto out;
+
+	/* Get Remote DCB Config */
+	dcbx_cfg = &pi->qos_cfg.remote_dcbx_cfg;
+	ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+				 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+	/* Don't treat ENOENT as an error for Remote MIBs */
+	if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
+		ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * ice_get_dcb_cfg
+ * @pi: port information structure
+ *
+ * Get DCB configuration from the Firmware
+ */
+enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg;
+	struct ice_dcbx_cfg *dcbx_cfg;
+	enum ice_status ret;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL);
+	if (!ret) {
+		/* CEE mode */
+		ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE);
+		ice_cee_to_dcb_cfg(&cee_cfg, pi);
+	} else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
+		/* CEE mode not enabled try querying IEEE data */
+		dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+		dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
+		ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_IEEE);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_init_dcb
+ * @hw: pointer to the HW struct
+ * @enable_mib_change: enable MIB change event
+ *
+ * Update DCB configuration from the Firmware
+ */
+enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
+{
+	struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
+	enum ice_status ret = 0;
+
+	if (!hw->func_caps.common_cap.dcb)
+		return ICE_ERR_NOT_SUPPORTED;
+
+	qos_cfg->is_sw_lldp = true;
+
+	/* Get DCBX status */
+	qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
+
+	if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DONE ||
+	    qos_cfg->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS ||
+	    qos_cfg->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
+		/* Get current DCBX configuration */
+		ret = ice_get_dcb_cfg(hw->port_info);
+		if (ret)
+			return ret;
+		qos_cfg->is_sw_lldp = false;
+	} else if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS) {
+		return ICE_ERR_NOT_READY;
+	}
+
+	/* Configure the LLDP MIB change event */
+	if (enable_mib_change) {
+		ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
+		if (ret)
+			qos_cfg->is_sw_lldp = true;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_mib: enable/disable MIB change event
+ *
+ * Configure (disable/enable) MIB
+ */
+enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib)
+{
+	struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
+	enum ice_status ret;
+
+	if (!hw->func_caps.common_cap.dcb)
+		return ICE_ERR_NOT_SUPPORTED;
+
+	/* Get DCBX status */
+	qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
+
+	if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS)
+		return ICE_ERR_NOT_READY;
+
+	ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL);
+	if (!ret)
+		qos_cfg->is_sw_lldp = !ena_mib;
+
+	return ret;
+}
+
+/**
+ * ice_add_ieee_ets_common_tlv
+ * @buf: Data buffer to be populated with ice_dcb_ets_cfg data
+ * @ets_cfg: Container for ice_dcb_ets_cfg data
+ *
+ * Populate the TLV buffer with ice_dcb_ets_cfg data
+ */
+static void
+ice_add_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+	u8 priority0, priority1;
+	u8 offset = 0;
+	int i;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+		priority0 = ets_cfg->prio_table[i * 2] & 0xF;
+		priority1 = ets_cfg->prio_table[i * 2 + 1] & 0xF;
+		buf[offset] = (priority0 << ICE_IEEE_ETS_PRIO_1_S) | priority1;
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 *
+	 * TSA Assignment Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		buf[offset] = ets_cfg->tcbwtable[i];
+		buf[ICE_MAX_TRAFFIC_CLASS + offset] = ets_cfg->tsatable[i];
+		offset++;
+	}
+}
+
+/**
+ * ice_add_ieee_ets_tlv - Prepare ETS TLV in IEEE format
+ * @tlv: Fill the ETS config data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_add_ieee_ets_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u8 maxtcwilling = 0;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_ETS_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* First Octet post subtype
+	 * --------------------------
+	 * |will-|CBS  | Re-  | Max |
+	 * |ing  |     |served| TCs |
+	 * --------------------------
+	 * |1bit | 1bit|3 bits|3bits|
+	 */
+	etscfg = &dcbcfg->etscfg;
+	if (etscfg->willing)
+		maxtcwilling = BIT(ICE_IEEE_ETS_WILLING_S);
+	maxtcwilling |= etscfg->maxtcs & ICE_IEEE_ETS_MAXTC_M;
+	buf[0] = maxtcwilling;
+
+	/* Begin adding at Priority Assignment Table (offset 1 in buf) */
+	ice_add_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_add_ieee_etsrec_tlv - Prepare ETS Recommended TLV in IEEE format
+ * @tlv: Fill ETS Recommended TLV in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_add_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+			struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etsrec;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_ETS_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_REC);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	etsrec = &dcbcfg->etsrec;
+
+	/* First Octet is reserved */
+	/* Begin adding at Priority Assignment Table (offset 1 in buf) */
+	ice_add_ieee_ets_common_tlv(&buf[1], etsrec);
+}
+
+/**
+ * ice_add_ieee_pfc_tlv - Prepare PFC TLV in IEEE format
+ * @tlv: Fill PFC TLV in IEEE format
+ * @dcbcfg: Local store which holds the PFC CFG data
+ *
+ * Prepare IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_add_ieee_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_PFC_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_PFC_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* ----------------------------------------
+	 * |will-|MBC  | Re-  | PFC |  PFC Enable  |
+	 * |ing  |     |served| cap |              |
+	 * -----------------------------------------
+	 * |1bit | 1bit|2 bits|4bits| 1 octet      |
+	 */
+	if (dcbcfg->pfc.willing)
+		buf[0] = BIT(ICE_IEEE_PFC_WILLING_S);
+
+	if (dcbcfg->pfc.mbc)
+		buf[0] |= BIT(ICE_IEEE_PFC_MBC_S);
+
+	buf[0] |= dcbcfg->pfc.pfccap & 0xF;
+	buf[1] = dcbcfg->pfc.pfcena;
+}
+
+/**
+ * ice_add_ieee_app_pri_tlv -  Prepare APP TLV in IEEE format
+ * @tlv: Fill APP TLV in IEEE format
+ * @dcbcfg: Local store which holds the APP CFG data
+ *
+ * Prepare IEEE 802.1Qaz APP CFG TLV
+ */
+static void
+ice_add_ieee_app_pri_tlv(struct ice_lldp_org_tlv *tlv,
+			 struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 typelen, len, offset = 0;
+	u8 priority, selector, i = 0;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+
+	/* No APP TLVs then just return */
+	if (dcbcfg->numapps == 0)
+		return;
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_APP_PRI);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* Move offset to App Priority Table */
+	offset++;
+	/* Application Priority Table (3 octets)
+	 * Octets:|         1          |    2    |    3    |
+	 *        -----------------------------------------
+	 *        |Priority|Rsrvd| Sel |    Protocol ID    |
+	 *        -----------------------------------------
+	 *   Bits:|23    21|20 19|18 16|15                0|
+	 *        -----------------------------------------
+	 */
+	while (i < dcbcfg->numapps) {
+		priority = dcbcfg->app[i].priority & 0x7;
+		selector = dcbcfg->app[i].selector & 0x7;
+		buf[offset] = (priority << ICE_IEEE_APP_PRIO_S) | selector;
+		buf[offset + 1] = (dcbcfg->app[i].prot_id >> 0x8) & 0xFF;
+		buf[offset + 2] = dcbcfg->app[i].prot_id & 0xFF;
+		/* Move to next app */
+		offset += 3;
+		i++;
+		if (i >= ICE_DCBX_MAX_APPS)
+			break;
+	}
+	/* len includes size of ouisubtype + 1 reserved + 3*numapps */
+	len = sizeof(tlv->ouisubtype) + 1 + (i * 3);
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | (len & 0x1FF));
+	tlv->typelen = htons(typelen);
+}
+
+/**
+ * ice_add_dcb_tlv - Add all IEEE TLVs
+ * @tlv: Fill TLV data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ * @tlvid: Type of IEEE TLV
+ *
+ * Add tlv information
+ */
+static void
+ice_add_dcb_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg,
+		u16 tlvid)
+{
+	switch (tlvid) {
+	case ICE_IEEE_TLV_ID_ETS_CFG:
+		ice_add_ieee_ets_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_TLV_ID_ETS_REC:
+		ice_add_ieee_etsrec_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_TLV_ID_PFC_CFG:
+		ice_add_ieee_pfc_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_TLV_ID_APP_PRI:
+		ice_add_ieee_app_pri_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_dcb_cfg_to_lldp - Convert DCB configuration to MIB format
+ * @lldpmib: pointer to the HW struct
+ * @miblen: length of LLDP MIB
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Convert the DCB configuration to MIB format
+ */
+static void
+ice_dcb_cfg_to_lldp(u8 *lldpmib, u16 *miblen, struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 len, offset = 0, tlvid = ICE_TLV_ID_START;
+	struct ice_lldp_org_tlv *tlv;
+	u16 typelen;
+
+	tlv = (struct ice_lldp_org_tlv *)lldpmib;
+	while (1) {
+		ice_add_dcb_tlv(tlv, dcbcfg, tlvid++);
+		typelen = ntohs(tlv->typelen);
+		len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+		if (len)
+			offset += len + 2;
+		/* END TLV or beyond LLDPDU size */
+		if (tlvid >= ICE_TLV_ID_END_OF_LLDPPDU ||
+		    offset > ICE_LLDPDU_SIZE)
+			break;
+		/* Move to next TLV */
+		if (len)
+			tlv = (struct ice_lldp_org_tlv *)
+				((char *)tlv + sizeof(tlv->typelen) + len);
+	}
+	*miblen = offset;
+}
+
+/**
+ * ice_set_dcb_cfg - Set the local LLDP MIB to FW
+ * @pi: port information structure
+ *
+ * Set DCB configuration to the Firmware
+ */
+enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi)
+{
+	u8 mib_type, *lldpmib = NULL;
+	struct ice_dcbx_cfg *dcbcfg;
+	enum ice_status ret;
+	struct ice_hw *hw;
+	u16 miblen;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	/* update the HW local config */
+	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+	/* Allocate the LLDPDU */
+	lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+	if (!lldpmib)
+		return ICE_ERR_NO_MEMORY;
+
+	mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+	if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+		mib_type |= SET_LOCAL_MIB_TYPE_CEE_NON_WILLING;
+
+	ice_dcb_cfg_to_lldp(lldpmib, &miblen, dcbcfg);
+	ret = ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, miblen,
+				  NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+	return ret;
+}
+
+/**
+ * ice_aq_query_port_ets - query port ETS configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ETS configuration
+ */
+static enum ice_status
+ice_aq_query_port_ets(struct ice_port_info *pi,
+		      struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aqc_query_port_ets *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	cmd = &desc.params.port_ets;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
+	cmd->port_teid = pi->root->info.node_teid;
+
+	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
+	return status;
+}
+
+/**
+ * ice_update_port_tc_tree_cfg - update TC tree configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ *
+ * update the SW DB with the new TC changes
+ */
+static enum ice_status
+ice_update_port_tc_tree_cfg(struct ice_port_info *pi,
+			    struct ice_aqc_port_ets_elem *buf)
+{
+	struct ice_sched_node *node, *tc_node;
+	struct ice_aqc_txsched_elem_data elem;
+	enum ice_status status = 0;
+	u32 teid1, teid2;
+	u8 i, j;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	/* suspend the missing TC nodes */
+	for (i = 0; i < pi->root->num_children; i++) {
+		teid1 = le32_to_cpu(pi->root->children[i]->info.node_teid);
+		ice_for_each_traffic_class(j) {
+			teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+			if (teid1 == teid2)
+				break;
+		}
+		if (j < ICE_MAX_TRAFFIC_CLASS)
+			continue;
+		/* TC is missing */
+		pi->root->children[i]->in_use = false;
+	}
+	/* add the new TC nodes */
+	ice_for_each_traffic_class(j) {
+		teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+		if (teid2 == ICE_INVAL_TEID)
+			continue;
+		/* Is it already present in the tree ? */
+		for (i = 0; i < pi->root->num_children; i++) {
+			tc_node = pi->root->children[i];
+			if (!tc_node)
+				continue;
+			teid1 = le32_to_cpu(tc_node->info.node_teid);
+			if (teid1 == teid2) {
+				tc_node->tc_num = j;
+				tc_node->in_use = true;
+				break;
+			}
+		}
+		if (i < pi->root->num_children)
+			continue;
+		/* new TC */
+		status = ice_sched_query_elem(pi->hw, teid2, &elem);
+		if (!status)
+			status = ice_sched_add_node(pi, 1, &elem);
+		if (status)
+			break;
+		/* update the TC number */
+		node = ice_sched_find_node_by_teid(pi->root, teid2);
+		if (node)
+			node->tc_num = j;
+	}
+	return status;
+}
+
+/**
+ * ice_query_port_ets - query port ETS configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ETS configuration and update the
+ * SW DB with the TC changes
+ */
+enum ice_status
+ice_query_port_ets(struct ice_port_info *pi,
+		   struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	enum ice_status status;
+
+	mutex_lock(&pi->sched_lock);
+	status = ice_aq_query_port_ets(pi, buf, buf_size, cd);
+	if (!status)
+		status = ice_update_port_tc_tree_cfg(pi, buf);
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h
new file mode 100644
index 000000000..d7e5e6178
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_H_
+#define _ICE_DCB_H_
+
+#include "ice_type.h"
+
+#define ICE_DCBX_STATUS_NOT_STARTED	0
+#define ICE_DCBX_STATUS_IN_PROGRESS	1
+#define ICE_DCBX_STATUS_DONE		2
+#define ICE_DCBX_STATUS_DIS		7
+
+#define ICE_TLV_TYPE_END		0
+#define ICE_TLV_TYPE_ORG		127
+
+#define ICE_IEEE_8021QAZ_OUI		0x0080C2
+#define ICE_IEEE_SUBTYPE_ETS_CFG	9
+#define ICE_IEEE_SUBTYPE_ETS_REC	10
+#define ICE_IEEE_SUBTYPE_PFC_CFG	11
+#define ICE_IEEE_SUBTYPE_APP_PRI	12
+
+#define ICE_CEE_DCBX_OUI		0x001B21
+#define ICE_CEE_DCBX_TYPE		2
+#define ICE_CEE_SUBTYPE_PG_CFG		2
+#define ICE_CEE_SUBTYPE_PFC_CFG		3
+#define ICE_CEE_SUBTYPE_APP_PRI		4
+#define ICE_CEE_MAX_FEAT_TYPE		3
+/* Defines for LLDP TLV header */
+#define ICE_LLDP_TLV_LEN_S		0
+#define ICE_LLDP_TLV_LEN_M		(0x01FF << ICE_LLDP_TLV_LEN_S)
+#define ICE_LLDP_TLV_TYPE_S		9
+#define ICE_LLDP_TLV_TYPE_M		(0x7F << ICE_LLDP_TLV_TYPE_S)
+#define ICE_LLDP_TLV_SUBTYPE_S		0
+#define ICE_LLDP_TLV_SUBTYPE_M		(0xFF << ICE_LLDP_TLV_SUBTYPE_S)
+#define ICE_LLDP_TLV_OUI_S		8
+#define ICE_LLDP_TLV_OUI_M		(0xFFFFFFUL << ICE_LLDP_TLV_OUI_S)
+
+/* Defines for IEEE ETS TLV */
+#define ICE_IEEE_ETS_MAXTC_S	0
+#define ICE_IEEE_ETS_MAXTC_M		(0x7 << ICE_IEEE_ETS_MAXTC_S)
+#define ICE_IEEE_ETS_CBS_S		6
+#define ICE_IEEE_ETS_CBS_M		BIT(ICE_IEEE_ETS_CBS_S)
+#define ICE_IEEE_ETS_WILLING_S		7
+#define ICE_IEEE_ETS_WILLING_M		BIT(ICE_IEEE_ETS_WILLING_S)
+#define ICE_IEEE_ETS_PRIO_0_S		0
+#define ICE_IEEE_ETS_PRIO_0_M		(0x7 << ICE_IEEE_ETS_PRIO_0_S)
+#define ICE_IEEE_ETS_PRIO_1_S		4
+#define ICE_IEEE_ETS_PRIO_1_M		(0x7 << ICE_IEEE_ETS_PRIO_1_S)
+#define ICE_CEE_PGID_PRIO_0_S		0
+#define ICE_CEE_PGID_PRIO_0_M		(0xF << ICE_CEE_PGID_PRIO_0_S)
+#define ICE_CEE_PGID_PRIO_1_S		4
+#define ICE_CEE_PGID_PRIO_1_M		(0xF << ICE_CEE_PGID_PRIO_1_S)
+#define ICE_CEE_PGID_STRICT		15
+
+/* Defines for IEEE TSA types */
+#define ICE_IEEE_TSA_STRICT		0
+#define ICE_IEEE_TSA_ETS		2
+
+/* Defines for IEEE PFC TLV */
+#define ICE_IEEE_PFC_CAP_S		0
+#define ICE_IEEE_PFC_CAP_M		(0xF << ICE_IEEE_PFC_CAP_S)
+#define ICE_IEEE_PFC_MBC_S		6
+#define ICE_IEEE_PFC_MBC_M		BIT(ICE_IEEE_PFC_MBC_S)
+#define ICE_IEEE_PFC_WILLING_S		7
+#define ICE_IEEE_PFC_WILLING_M		BIT(ICE_IEEE_PFC_WILLING_S)
+
+/* Defines for IEEE APP TLV */
+#define ICE_IEEE_APP_SEL_S		0
+#define ICE_IEEE_APP_SEL_M		(0x7 << ICE_IEEE_APP_SEL_S)
+#define ICE_IEEE_APP_PRIO_S		5
+#define ICE_IEEE_APP_PRIO_M		(0x7 << ICE_IEEE_APP_PRIO_S)
+
+/* TLV definitions for preparing MIB */
+#define ICE_IEEE_TLV_ID_ETS_CFG		3
+#define ICE_IEEE_TLV_ID_ETS_REC		4
+#define ICE_IEEE_TLV_ID_PFC_CFG		5
+#define ICE_IEEE_TLV_ID_APP_PRI		6
+#define ICE_TLV_ID_END_OF_LLDPPDU	7
+#define ICE_TLV_ID_START		ICE_IEEE_TLV_ID_ETS_CFG
+
+#define ICE_IEEE_ETS_TLV_LEN		25
+#define ICE_IEEE_PFC_TLV_LEN		6
+#define ICE_IEEE_APP_TLV_LEN		11
+
+/* IEEE 802.1AB LLDP Organization specific TLV */
+struct ice_lldp_org_tlv {
+	__be16 typelen;
+	__be32 ouisubtype;
+	u8 tlvinfo[];
+} __packed;
+
+struct ice_cee_tlv_hdr {
+	__be16 typelen;
+	u8 operver;
+	u8 maxver;
+};
+
+struct ice_cee_ctrl_tlv {
+	struct ice_cee_tlv_hdr hdr;
+	__be32 seqno;
+	__be32 ackno;
+};
+
+struct ice_cee_feat_tlv {
+	struct ice_cee_tlv_hdr hdr;
+	u8 en_will_err; /* Bits: |En|Will|Err|Reserved(5)| */
+#define ICE_CEE_FEAT_TLV_ENA_M		0x80
+#define ICE_CEE_FEAT_TLV_WILLING_M	0x40
+#define ICE_CEE_FEAT_TLV_ERR_M		0x20
+	u8 subtype;
+	u8 tlvinfo[];
+};
+
+struct ice_cee_app_prio {
+	__be16 protocol;
+	u8 upper_oui_sel; /* Bits: |Upper OUI(6)|Selector(2)| */
+#define ICE_CEE_APP_SELECTOR_M	0x03
+	__be16 lower_oui;
+	u8 prio_map;
+} __packed;
+
+enum ice_status
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+		   struct ice_dcbx_cfg *dcbcfg);
+enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi);
+enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi);
+enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change);
+enum ice_status
+ice_query_port_ets(struct ice_port_info *pi,
+		   struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		   struct ice_sq_cd *cmd_details);
+#ifdef CONFIG_DCB
+enum ice_status
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
+		 struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+		       bool *dcbx_agent_status, struct ice_sq_cd *cd);
+enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib);
+#else /* CONFIG_DCB */
+static inline enum ice_status
+ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
+		 bool __always_unused shutdown_lldp_agent,
+		 bool __always_unused persist,
+		 struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline enum ice_status
+ice_aq_start_lldp(struct ice_hw __always_unused *hw,
+		  bool __always_unused persist,
+		  struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline enum ice_status
+ice_aq_start_stop_dcbx(struct ice_hw __always_unused *hw,
+		       bool __always_unused start_dcbx_agent,
+		       bool *dcbx_agent_status,
+		       struct ice_sq_cd __always_unused *cd)
+{
+	*dcbx_agent_status = false;
+
+	return 0;
+}
+
+static inline enum ice_status
+ice_cfg_lldp_mib_change(struct ice_hw __always_unused *hw,
+			bool __always_unused ena_mib)
+{
+	return 0;
+}
+
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
new file mode 100644
index 000000000..1e8f71ffc
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -0,0 +1,932 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+
+/**
+ * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
+ * @vsi: the VSI being configured
+ * @ena_tc: TC map to be enabled
+ */
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_pf *pf = vsi->back;
+	struct ice_dcbx_cfg *dcbcfg;
+	u8 netdev_tc;
+	int i;
+
+	if (!netdev)
+		return;
+
+	if (!ena_tc) {
+		netdev_reset_tc(netdev);
+		return;
+	}
+
+	if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc))
+		return;
+
+	dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	ice_for_each_traffic_class(i)
+		if (vsi->tc_cfg.ena_tc & BIT(i))
+			netdev_set_tc_queue(netdev,
+					    vsi->tc_cfg.tc_info[i].netdev_tc,
+					    vsi->tc_cfg.tc_info[i].qcount_tx,
+					    vsi->tc_cfg.tc_info[i].qoffset);
+
+	for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+		u8 ets_tc = dcbcfg->etscfg.prio_table[i];
+
+		/* Get the mapped netdev TC# for the UP */
+		netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
+		netdev_set_prio_tc_map(netdev, i, netdev_tc);
+	}
+}
+
+/**
+ * ice_dcb_get_ena_tc - return bitmap of enabled TCs
+ * @dcbcfg: DCB config to evaluate for enabled TCs
+ */
+u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 i, num_tc, ena_tc = 1;
+
+	num_tc = ice_dcb_get_num_tc(dcbcfg);
+
+	for (i = 0; i < num_tc; i++)
+		ena_tc |= BIT(i);
+
+	return ena_tc;
+}
+
+/**
+ * ice_is_pfc_causing_hung_q
+ * @pf: pointer to PF structure
+ * @txqueue: Tx queue which is supposedly hung queue
+ *
+ * find if PFC is causing the hung queue, if yes return true else false
+ */
+bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue)
+{
+	u8 num_tcs = 0, i, tc, up_mapped_tc, up_in_tc = 0;
+	u64 ref_prio_xoff[ICE_MAX_UP];
+	struct ice_vsi *vsi;
+	u32 up2tc;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return false;
+
+	ice_for_each_traffic_class(i)
+		if (vsi->tc_cfg.ena_tc & BIT(i))
+			num_tcs++;
+
+	/* first find out the TC to which the hung queue belongs to */
+	for (tc = 0; tc < num_tcs - 1; tc++)
+		if (ice_find_q_in_range(vsi->tc_cfg.tc_info[tc].qoffset,
+					vsi->tc_cfg.tc_info[tc + 1].qoffset,
+					txqueue))
+			break;
+
+	/* Build a bit map of all UPs associated to the suspect hung queue TC,
+	 * so that we check for its counter increment.
+	 */
+	up2tc = rd32(&pf->hw, PRTDCB_TUP2TC);
+	for (i = 0; i < ICE_MAX_UP; i++) {
+		up_mapped_tc = (up2tc >> (i * 3)) & 0x7;
+		if (up_mapped_tc == tc)
+			up_in_tc |= BIT(i);
+	}
+
+	/* Now that we figured out that hung queue is PFC enabled, still the
+	 * Tx timeout can be legitimate. So to make sure Tx timeout is
+	 * absolutely caused by PFC storm, check if the counters are
+	 * incrementing.
+	 */
+	for (i = 0; i < ICE_MAX_UP; i++)
+		if (up_in_tc & BIT(i))
+			ref_prio_xoff[i] = pf->stats.priority_xoff_rx[i];
+
+	ice_update_dcb_stats(pf);
+
+	for (i = 0; i < ICE_MAX_UP; i++)
+		if (up_in_tc & BIT(i))
+			if (pf->stats.priority_xoff_rx[i] > ref_prio_xoff[i])
+				return true;
+
+	return false;
+}
+
+/**
+ * ice_dcb_get_mode - gets the DCB mode
+ * @port_info: pointer to port info structure
+ * @host: if set it's HOST if not it's MANAGED
+ */
+static u8 ice_dcb_get_mode(struct ice_port_info *port_info, bool host)
+{
+	u8 mode;
+
+	if (host)
+		mode = DCB_CAP_DCBX_HOST;
+	else
+		mode = DCB_CAP_DCBX_LLD_MANAGED;
+
+	if (port_info->qos_cfg.local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE)
+		return mode | DCB_CAP_DCBX_VER_CEE;
+	else
+		return mode | DCB_CAP_DCBX_VER_IEEE;
+}
+
+/**
+ * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
+ * @dcbcfg: config to retrieve number of TCs from
+ */
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+	bool tc_unused = false;
+	u8 num_tc = 0;
+	u8 ret = 0;
+	int i;
+
+	/* Scan the ETS Config Priority Table to find traffic classes
+	 * enabled and create a bitmask of enabled TCs
+	 */
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+		num_tc |= BIT(dcbcfg->etscfg.prio_table[i]);
+
+	/* Scan bitmask for contiguous TCs starting with TC0 */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (num_tc & BIT(i)) {
+			if (!tc_unused) {
+				ret++;
+			} else {
+				pr_err("Non-contiguous TCs - Disabling DCB\n");
+				return 1;
+			}
+		} else {
+			tc_unused = true;
+		}
+	}
+
+	/* There is always at least 1 TC */
+	if (!ret)
+		ret = 1;
+
+	return ret;
+}
+
+/**
+ * ice_dcb_get_tc - Get the TC associated with the queue
+ * @vsi: ptr to the VSI
+ * @queue_index: queue number associated with VSI
+ */
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index)
+{
+	return vsi->tx_rings[queue_index]->dcb_tc;
+}
+
+/**
+ * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
+ * @vsi: VSI owner of rings being updated
+ */
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
+{
+	struct ice_ring *tx_ring, *rx_ring;
+	u16 qoffset, qcount;
+	int i, n;
+
+	if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
+		/* Reset the TC information */
+		for (i = 0; i < vsi->num_txq; i++) {
+			tx_ring = vsi->tx_rings[i];
+			tx_ring->dcb_tc = 0;
+		}
+		for (i = 0; i < vsi->num_rxq; i++) {
+			rx_ring = vsi->rx_rings[i];
+			rx_ring->dcb_tc = 0;
+		}
+		return;
+	}
+
+	ice_for_each_traffic_class(n) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(n)))
+			break;
+
+		qoffset = vsi->tc_cfg.tc_info[n].qoffset;
+		qcount = vsi->tc_cfg.tc_info[n].qcount_tx;
+		for (i = qoffset; i < (qoffset + qcount); i++) {
+			tx_ring = vsi->tx_rings[i];
+			rx_ring = vsi->rx_rings[i];
+			tx_ring->dcb_tc = n;
+			rx_ring->dcb_tc = n;
+		}
+	}
+}
+
+/**
+ * ice_dcb_bwchk - check if ETS bandwidth input parameters are correct
+ * @pf: pointer to the PF struct
+ * @dcbcfg: pointer to DCB config structure
+ */
+int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg = &dcbcfg->etscfg;
+	u8 num_tc, total_bw = 0;
+	int i;
+
+	/* returns number of contigous TCs and 1 TC for non-contigous TCs,
+	 * since at least 1 TC has to be configured
+	 */
+	num_tc = ice_dcb_get_num_tc(dcbcfg);
+
+	/* no bandwidth checks required if there's only one TC, so assign
+	 * all bandwidth to TC0 and return
+	 */
+	if (num_tc == 1) {
+		etscfg->tcbwtable[0] = ICE_TC_MAX_BW;
+		return 0;
+	}
+
+	for (i = 0; i < num_tc; i++)
+		total_bw += etscfg->tcbwtable[i];
+
+	if (!total_bw) {
+		etscfg->tcbwtable[0] = ICE_TC_MAX_BW;
+	} else if (total_bw != ICE_TC_MAX_BW) {
+		dev_err(ice_pf_to_dev(pf), "Invalid config, total bandwidth must equal 100\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_pf_dcb_cfg - Apply new DCB configuration
+ * @pf: pointer to the PF struct
+ * @new_cfg: DCBX config to apply
+ * @locked: is the RTNL held
+ */
+int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret = ICE_DCB_NO_HW_CHG;
+	struct ice_vsi *pf_vsi;
+
+	curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	/* FW does not care if change happened */
+	if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
+		ret = ICE_DCB_HW_CHG_RST;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (ice_dcb_get_num_tc(new_cfg) > 1) {
+		dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) {
+		dev_dbg(dev, "No change in DCB config required\n");
+		return ret;
+	}
+
+	if (ice_dcb_bwchk(pf, new_cfg))
+		return -EINVAL;
+
+	/* Store old config in case FW config fails */
+	old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL);
+	if (!old_cfg)
+		return -ENOMEM;
+
+	dev_info(dev, "Commit DCB Configuration to the hardware\n");
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi) {
+		dev_dbg(dev, "PF VSI doesn't exist\n");
+		ret = -EINVAL;
+		goto free_cfg;
+	}
+
+	/* avoid race conditions by holding the lock while disabling and
+	 * re-enabling the VSI
+	 */
+	if (!locked)
+		rtnl_lock();
+	ice_dis_vsi(pf_vsi, true);
+
+	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
+	memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+	memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+
+	/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
+	 * the new config came from the HW in the first place.
+	 */
+	if (pf->hw.port_info->qos_cfg.is_sw_lldp) {
+		ret = ice_set_dcb_cfg(pf->hw.port_info);
+		if (ret) {
+			dev_err(dev, "Set DCB Config failed\n");
+			/* Restore previous settings to local config */
+			memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg));
+			goto out;
+		}
+	}
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(dev, "Query Port ETS failed\n");
+		goto out;
+	}
+
+	ice_pf_dcb_recfg(pf);
+
+out:
+	ice_ena_vsi(pf_vsi, true);
+	if (!locked)
+		rtnl_unlock();
+free_cfg:
+	kfree(old_cfg);
+	return ret;
+}
+
+/**
+ * ice_cfg_etsrec_defaults - Set default ETS recommended DCB config
+ * @pi: port information structure
+ */
+static void ice_cfg_etsrec_defaults(struct ice_port_info *pi)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+	u8 i;
+
+	/* Ensure ETS recommended DCB configuration is not already set */
+	if (dcbcfg->etsrec.maxtcs)
+		return;
+
+	/* In CEE mode, set the default to 1 TC */
+	dcbcfg->etsrec.maxtcs = 1;
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		dcbcfg->etsrec.tcbwtable[i] = i ? 0 : 100;
+		dcbcfg->etsrec.tsatable[i] = i ? ICE_IEEE_TSA_STRICT :
+						 ICE_IEEE_TSA_ETS;
+	}
+}
+
+/**
+ * ice_dcb_need_recfg - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ */
+static bool
+ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		   struct ice_dcbx_cfg *new_cfg)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	bool need_reconfig = false;
+
+	/* Check if ETS configuration has changed */
+	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
+		   sizeof(new_cfg->etscfg))) {
+		/* If Priority Table has changed reconfig is needed */
+		if (memcmp(&new_cfg->etscfg.prio_table,
+			   &old_cfg->etscfg.prio_table,
+			   sizeof(new_cfg->etscfg.prio_table))) {
+			need_reconfig = true;
+			dev_dbg(dev, "ETS UP2TC changed.\n");
+		}
+
+		if (memcmp(&new_cfg->etscfg.tcbwtable,
+			   &old_cfg->etscfg.tcbwtable,
+			   sizeof(new_cfg->etscfg.tcbwtable)))
+			dev_dbg(dev, "ETS TC BW Table changed.\n");
+
+		if (memcmp(&new_cfg->etscfg.tsatable,
+			   &old_cfg->etscfg.tsatable,
+			   sizeof(new_cfg->etscfg.tsatable)))
+			dev_dbg(dev, "ETS TSA Table changed.\n");
+	}
+
+	/* Check if PFC configuration has changed */
+	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
+		need_reconfig = true;
+		dev_dbg(dev, "PFC config change detected.\n");
+	}
+
+	/* Check if APP Table has changed */
+	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
+		need_reconfig = true;
+		dev_dbg(dev, "APP Table change detected.\n");
+	}
+
+	dev_dbg(dev, "dcb need_reconfig=%d\n", need_reconfig);
+	return need_reconfig;
+}
+
+/**
+ * ice_dcb_rebuild - rebuild DCB post reset
+ * @pf: physical function instance
+ */
+void ice_dcb_rebuild(struct ice_pf *pf)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_dcbx_cfg *err_cfg;
+	enum ice_status ret;
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(dev, "Query Port ETS failed\n");
+		goto dcb_error;
+	}
+
+	mutex_lock(&pf->tc_mutex);
+
+	if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
+		ice_cfg_etsrec_defaults(pf->hw.port_info);
+
+	ret = ice_set_dcb_cfg(pf->hw.port_info);
+	if (ret) {
+		dev_err(dev, "Failed to set DCB config in rebuild\n");
+		goto dcb_error;
+	}
+
+	if (!pf->hw.port_info->qos_cfg.is_sw_lldp) {
+		ret = ice_cfg_lldp_mib_change(&pf->hw, true);
+		if (ret && !pf->hw.port_info->qos_cfg.is_sw_lldp) {
+			dev_err(dev, "Failed to register for MIB changes\n");
+			goto dcb_error;
+		}
+	}
+
+	dev_info(dev, "DCB info restored\n");
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(dev, "Query Port ETS failed\n");
+		goto dcb_error;
+	}
+
+	mutex_unlock(&pf->tc_mutex);
+
+	return;
+
+dcb_error:
+	dev_err(dev, "Disabling DCB until new settings occur\n");
+	err_cfg = kzalloc(sizeof(*err_cfg), GFP_KERNEL);
+	if (!err_cfg) {
+		mutex_unlock(&pf->tc_mutex);
+		return;
+	}
+
+	err_cfg->etscfg.willing = true;
+	err_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
+	err_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+	memcpy(&err_cfg->etsrec, &err_cfg->etscfg, sizeof(err_cfg->etsrec));
+	/* Coverity warns the return code of ice_pf_dcb_cfg() is not checked
+	 * here as is done for other calls to that function. That check is
+	 * not necessary since this is in this function's error cleanup path.
+	 * Suppress the Coverity warning with the following comment...
+	 */
+	/* coverity[check_return] */
+	ice_pf_dcb_cfg(pf, err_cfg, false);
+	kfree(err_cfg);
+
+	mutex_unlock(&pf->tc_mutex);
+}
+
+/**
+ * ice_dcb_init_cfg - set the initial DCB config in SW
+ * @pf: PF to apply config to
+ * @locked: Is the RTNL held
+ */
+static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
+{
+	struct ice_dcbx_cfg *newcfg;
+	struct ice_port_info *pi;
+	int ret = 0;
+
+	pi = pf->hw.port_info;
+	newcfg = kmemdup(&pi->qos_cfg.local_dcbx_cfg, sizeof(*newcfg),
+			 GFP_KERNEL);
+	if (!newcfg)
+		return -ENOMEM;
+
+	memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*newcfg));
+
+	dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n");
+	if (ice_pf_dcb_cfg(pf, newcfg, locked))
+		ret = -EINVAL;
+
+	kfree(newcfg);
+
+	return ret;
+}
+
+/**
+ * ice_dcb_sw_dflt_cfg - Apply a default DCB config
+ * @pf: PF to apply config to
+ * @ets_willing: configure ETS willing
+ * @locked: was this function called with RTNL held
+ */
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct ice_dcbx_cfg *dcbcfg;
+	struct ice_port_info *pi;
+	struct ice_hw *hw;
+	int ret;
+
+	hw = &pf->hw;
+	pi = hw->port_info;
+	dcbcfg = kzalloc(sizeof(*dcbcfg), GFP_KERNEL);
+	if (!dcbcfg)
+		return -ENOMEM;
+
+	memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*dcbcfg));
+
+	dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
+	dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
+	dcbcfg->etscfg.tcbwtable[0] = 100;
+	dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+
+	memcpy(&dcbcfg->etsrec, &dcbcfg->etscfg,
+	       sizeof(dcbcfg->etsrec));
+	dcbcfg->etsrec.willing = 0;
+
+	dcbcfg->pfc.willing = 1;
+	dcbcfg->pfc.pfccap = hw->func_caps.common_cap.maxtc;
+
+	dcbcfg->numapps = 1;
+	dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE;
+	dcbcfg->app[0].priority = 3;
+	dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
+
+	ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
+	kfree(dcbcfg);
+	if (ret)
+		return ret;
+
+	return ice_query_port_ets(pi, &buf, sizeof(buf), NULL);
+}
+
+/**
+ * ice_dcb_tc_contig - Check that TCs are contiguous
+ * @prio_table: pointer to priority table
+ *
+ * Check if TCs begin with TC0 and are contiguous
+ */
+static bool ice_dcb_tc_contig(u8 *prio_table)
+{
+	bool found_empty = false;
+	u8 used_tc = 0;
+	int i;
+
+	/* Create a bitmap of used TCs */
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+		used_tc |= BIT(prio_table[i]);
+
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
+		if (used_tc & BIT(i)) {
+			if (found_empty)
+				return false;
+		} else {
+			found_empty = true;
+		}
+	}
+
+	return true;
+}
+
+/**
+ * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs
+ * @pf: pointer to the PF struct
+ *
+ * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing
+ */
+static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret;
+
+	/* Configure SW DCB default with ETS non-willing */
+	ret = ice_dcb_sw_dflt_cfg(pf, false, true);
+	if (ret) {
+		dev_err(dev, "Failed to set local DCB config %d\n", ret);
+		return ret;
+	}
+
+	/* Reconfigure with ETS willing so that FW will send LLDP MIB event */
+	dcbcfg->etscfg.willing = 1;
+	ret = ice_set_dcb_cfg(pf->hw.port_info);
+	if (ret)
+		dev_err(dev, "Failed to set DCB to unwilling\n");
+
+	return ret;
+}
+
+/**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+void ice_pf_dcb_recfg(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+	u8 tc_map = 0;
+	int v, ret;
+
+	/* Update each VSI */
+	ice_for_each_vsi(pf, v) {
+		struct ice_vsi *vsi = pf->vsi[v];
+
+		if (!vsi)
+			continue;
+
+		if (vsi->type == ICE_VSI_PF) {
+			tc_map = ice_dcb_get_ena_tc(dcbcfg);
+
+			/* If DCBX request non-contiguous TC, then configure
+			 * default TC
+			 */
+			if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) {
+				tc_map = ICE_DFLT_TRAFFIC_CLASS;
+				ice_dcb_noncontig_cfg(pf);
+			}
+		} else {
+			tc_map = ICE_DFLT_TRAFFIC_CLASS;
+		}
+
+		ret = ice_vsi_cfg_tc(vsi, tc_map);
+		if (ret) {
+			dev_err(ice_pf_to_dev(pf), "Failed to config TC for VSI index: %d\n",
+				vsi->idx);
+			continue;
+		}
+
+		ice_vsi_map_rings_to_vectors(vsi);
+		if (vsi->type == ICE_VSI_PF)
+			ice_dcbnl_set_all(vsi);
+	}
+}
+
+/**
+ * ice_init_pf_dcb - initialize DCB for a PF
+ * @pf: PF to initialize DCB for
+ * @locked: Was function called with RTNL held
+ */
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_port_info *port_info;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	port_info = hw->port_info;
+
+	err = ice_init_dcb(hw, false);
+	if (err && !port_info->qos_cfg.is_sw_lldp) {
+		dev_err(dev, "Error initializing DCB %d\n", err);
+		goto dcb_init_err;
+	}
+
+	dev_info(dev, "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
+		 pf->hw.func_caps.common_cap.maxtc);
+	if (err) {
+		struct ice_vsi *pf_vsi;
+
+		/* FW LLDP is disabled, activate SW DCBX/LLDP mode */
+		dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
+		clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+		err = ice_dcb_sw_dflt_cfg(pf, true, locked);
+		if (err) {
+			dev_err(dev, "Failed to set local DCB config %d\n",
+				err);
+			err = -EIO;
+			goto dcb_init_err;
+		}
+
+		/* If the FW DCBX engine is not running then Rx LLDP packets
+		 * need to be redirected up the stack.
+		 */
+		pf_vsi = ice_get_main_vsi(pf);
+		if (!pf_vsi) {
+			dev_err(dev, "Failed to set local DCB config\n");
+			err = -EIO;
+			goto dcb_init_err;
+		}
+
+		ice_cfg_sw_lldp(pf_vsi, false, true);
+
+		pf->dcbx_cap = ice_dcb_get_mode(port_info, true);
+		return 0;
+	}
+
+	set_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+
+	/* DCBX/LLDP enabled in FW, set DCBNL mode advertisement */
+	pf->dcbx_cap = ice_dcb_get_mode(port_info, false);
+
+	err = ice_dcb_init_cfg(pf, locked);
+	if (err)
+		goto dcb_init_err;
+
+	return err;
+
+dcb_init_err:
+	dev_err(dev, "DCB init failed\n");
+	return err;
+}
+
+/**
+ * ice_update_dcb_stats - Update DCB stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+void ice_update_dcb_stats(struct ice_pf *pf)
+{
+	struct ice_hw_port_stats *prev_ps, *cur_ps;
+	struct ice_hw *hw = &pf->hw;
+	u8 port;
+	int i;
+
+	port = hw->port_info->lport;
+	prev_ps = &pf->stats_prev;
+	cur_ps = &pf->stats;
+
+	for (i = 0; i < 8; i++) {
+		ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xoff_rx[i],
+				  &cur_ps->priority_xoff_rx[i]);
+		ice_stat_update32(hw, GLPRT_PXONRXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_rx[i],
+				  &cur_ps->priority_xon_rx[i]);
+		ice_stat_update32(hw, GLPRT_PXONTXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_tx[i],
+				  &cur_ps->priority_xon_tx[i]);
+		ice_stat_update32(hw, GLPRT_PXOFFTXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xoff_tx[i],
+				  &cur_ps->priority_xoff_tx[i]);
+		ice_stat_update32(hw, GLPRT_RXON2OFFCNT(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_2_xoff[i],
+				  &cur_ps->priority_xon_2_xoff[i]);
+	}
+}
+
+/**
+ * ice_tx_prepare_vlan_flags_dcb - prepare VLAN tagging for DCB
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ *
+ * This should not be called if the outer VLAN is software offloaded as the VLAN
+ * tag will already be configured with the correct ID and priority bits
+ */
+void
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
+			      struct ice_tx_buf *first)
+{
+	struct sk_buff *skb = first->skb;
+
+	if (!test_bit(ICE_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
+		return;
+
+	/* Insert 802.1p priority into VLAN header */
+	if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN) ||
+	    skb->priority != TC_PRIO_CONTROL) {
+		first->tx_flags &= ~ICE_TX_FLAGS_VLAN_PR_M;
+		/* Mask the lower 3 bits to set the 802.1p priority */
+		first->tx_flags |= (skb->priority & 0x7) <<
+				   ICE_TX_FLAGS_VLAN_PR_S;
+		/* if this is not already set it means a VLAN 0 + priority needs
+		 * to be offloaded
+		 */
+		first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+	}
+}
+
+/**
+ * ice_dcb_process_lldp_set_mib_change - Process MIB change
+ * @pf: ptr to ice_pf
+ * @event: pointer to the admin queue receive event
+ */
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+				    struct ice_rq_event_info *event)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_aqc_lldp_get_mib *mib;
+	struct ice_dcbx_cfg tmp_dcbx_cfg;
+	bool need_reconfig = false;
+	struct ice_port_info *pi;
+	struct ice_vsi *pf_vsi;
+	u8 mib_type;
+	int ret;
+
+	/* Not DCB capable or capability disabled */
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
+		dev_dbg(dev, "MIB Change Event in HOST mode\n");
+		return;
+	}
+
+	pi = pf->hw.port_info;
+	mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
+	/* Ignore if event is not for Nearest Bridge */
+	mib_type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) &
+		    ICE_AQ_LLDP_BRID_TYPE_M);
+	dev_dbg(dev, "LLDP event MIB bridge type 0x%x\n", mib_type);
+	if (mib_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
+		return;
+
+	/* Check MIB Type and return if event for Remote MIB update */
+	mib_type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M;
+	dev_dbg(dev, "LLDP event mib type %s\n", mib_type ? "remote" : "local");
+	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
+		/* Update the remote cached instance and return */
+		ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
+					 &pi->qos_cfg.remote_dcbx_cfg);
+		if (ret) {
+			dev_err(dev, "Failed to get remote DCB config\n");
+			return;
+		}
+	}
+
+	mutex_lock(&pf->tc_mutex);
+
+	/* store the old configuration */
+	tmp_dcbx_cfg = pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	/* Reset the old DCBX configuration data */
+	memset(&pi->qos_cfg.local_dcbx_cfg, 0,
+	       sizeof(pi->qos_cfg.local_dcbx_cfg));
+
+	/* Get updated DCBX data from firmware */
+	ret = ice_get_dcb_cfg(pf->hw.port_info);
+	if (ret) {
+		dev_err(dev, "Failed to get DCB config\n");
+		goto out;
+	}
+
+	/* No change detected in DCBX configs */
+	if (!memcmp(&tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg,
+		    sizeof(tmp_dcbx_cfg))) {
+		dev_dbg(dev, "No change detected in DCBX configuration.\n");
+		goto out;
+	}
+
+	pf->dcbx_cap = ice_dcb_get_mode(pi, false);
+
+	need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
+					   &pi->qos_cfg.local_dcbx_cfg);
+	ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg);
+	if (!need_reconfig)
+		goto out;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (ice_dcb_get_num_tc(&pi->qos_cfg.local_dcbx_cfg) > 1) {
+		dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi) {
+		dev_dbg(dev, "PF VSI doesn't exist\n");
+		goto out;
+	}
+
+	rtnl_lock();
+	ice_dis_vsi(pf_vsi, true);
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(dev, "Query Port ETS failed\n");
+		goto unlock_rtnl;
+	}
+
+	/* changes in configuration update VSI */
+	ice_pf_dcb_recfg(pf);
+
+	ice_ena_vsi(pf_vsi, true);
+unlock_rtnl:
+	rtnl_unlock();
+out:
+	mutex_unlock(&pf->tc_mutex);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
new file mode 100644
index 000000000..35c21d9ae
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_LIB_H_
+#define _ICE_DCB_LIB_H_
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+
+#ifdef CONFIG_DCB
+#define ICE_TC_MAX_BW		100 /* Default Max BW percentage */
+#define ICE_DCB_HW_CHG_RST	0 /* DCB configuration changed with reset */
+#define ICE_DCB_NO_HW_CHG	1 /* DCB configuration did not change */
+#define ICE_DCB_HW_CHG		2 /* DCB configuration changed, no reset */
+
+void ice_dcb_rebuild(struct ice_pf *pf);
+u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi);
+bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue);
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
+int
+ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
+int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg);
+void ice_pf_dcb_recfg(struct ice_pf *pf);
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
+void ice_update_dcb_stats(struct ice_pf *pf);
+void
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
+			      struct ice_tx_buf *first);
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+				    struct ice_rq_event_info *event);
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
+
+/**
+ * ice_find_q_in_range
+ * @low: start of queue range for a TC i.e. offset of TC
+ * @high: start of queue for next TC
+ * @tx_q: hung_queue/tx_queue
+ *
+ * finds if queue 'tx_q' falls between the two offsets of any given TC
+ */
+static inline bool ice_find_q_in_range(u16 low, u16 high, unsigned int tx_q)
+{
+	return (tx_q >= low) && (tx_q < high);
+}
+
+static inline void
+ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring)
+{
+	tlan_ctx->cgd_num = ring->dcb_tc;
+}
+
+static inline bool ice_is_dcb_active(struct ice_pf *pf)
+{
+	return (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags) ||
+		test_bit(ICE_FLAG_DCB_ENA, pf->flags));
+}
+#else
+#define ice_dcb_rebuild(pf) do {} while (0)
+
+static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+	return ICE_DFLT_TRAFFIC_CLASS;
+}
+
+static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+	return 1;
+}
+
+static inline u8
+ice_dcb_get_tc(struct ice_vsi __always_unused *vsi,
+	       int __always_unused queue_index)
+{
+	return 0;
+}
+
+static inline int
+ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
+{
+	dev_dbg(ice_pf_to_dev(pf), "DCB not supported\n");
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_pf_dcb_cfg(struct ice_pf __always_unused *pf,
+	       struct ice_dcbx_cfg __always_unused *new_cfg,
+	       bool __always_unused locked)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring,
+			      struct ice_tx_buf __always_unused *first)
+{
+	return 0;
+}
+
+static inline bool ice_is_dcb_active(struct ice_pf __always_unused *pf)
+{
+	return false;
+}
+
+static inline bool
+ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf,
+			  unsigned int __always_unused txqueue)
+{
+	return false;
+}
+
+#define ice_update_dcb_stats(pf) do {} while (0)
+#define ice_pf_dcb_recfg(pf) do {} while (0)
+#define ice_vsi_cfg_dcb_rings(vsi) do {} while (0)
+#define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0)
+#define ice_set_cgd_num(tlan_ctx, ring) do {} while (0)
+#define ice_vsi_cfg_netdev_tc(vsi, ena_tc) do {} while (0)
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
new file mode 100644
index 000000000..4180f1f35
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -0,0 +1,974 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_dcb.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include <net/dcbnl.h>
+
+/**
+ * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info
+ * @netdev: device associated with interface that needs reset
+ */
+static void ice_dcbnl_devreset(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	while (ice_is_reset_in_progress(pf->state))
+		usleep_range(1000, 2000);
+
+	dev_close(netdev);
+	netdev_state_change(netdev);
+	dev_open(netdev, NULL);
+	netdev_state_change(netdev);
+}
+
+/**
+ * ice_dcbnl_getets - retrieve local ETS configuration
+ * @netdev: the relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets)
+{
+	struct ice_dcbx_cfg *dcbxcfg;
+	struct ice_pf *pf;
+
+	pf = ice_netdev_to_pf(netdev);
+	dcbxcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	ets->willing = dcbxcfg->etscfg.willing;
+	ets->ets_cap = dcbxcfg->etscfg.maxtcs;
+	ets->cbs = dcbxcfg->etscfg.cbs;
+	memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw));
+	memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_rx_bw));
+	memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable, sizeof(ets->tc_tsa));
+	memcpy(ets->prio_tc, dcbxcfg->etscfg.prio_table, sizeof(ets->prio_tc));
+	memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable,
+	       sizeof(ets->tc_reco_bw));
+	memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable,
+	       sizeof(ets->tc_reco_tsa));
+	memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prio_table,
+	       sizeof(ets->reco_prio_tc));
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_setets - set IEEE ETS configuration
+ * @netdev: pointer to relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int bwcfg = 0, bwrec = 0;
+	int err, i, max_tc = 0;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg->etscfg.willing = ets->willing;
+	new_cfg->etscfg.cbs = ets->cbs;
+	ice_for_each_traffic_class(i) {
+		new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i];
+		bwcfg += ets->tc_tx_bw[i];
+		new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i];
+		new_cfg->etscfg.prio_table[i] = ets->prio_tc[i];
+		if (ets->prio_tc[i] > max_tc)
+			max_tc = ets->prio_tc[i];
+		new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i];
+		bwrec += ets->tc_reco_bw[i];
+		new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i];
+		new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i];
+	}
+
+	if (ice_dcb_bwchk(pf, new_cfg)) {
+		err = -EINVAL;
+		goto ets_out;
+	}
+
+	max_tc = pf->hw.func_caps.common_cap.maxtc;
+
+	new_cfg->etscfg.maxtcs = max_tc;
+
+	if (!bwcfg)
+		new_cfg->etscfg.tcbwtable[0] = 100;
+
+	if (!bwrec)
+		new_cfg->etsrec.tcbwtable[0] = 100;
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (err == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (err == ICE_DCB_NO_HW_CHG)
+		err = ICE_DCB_HW_CHG_RST;
+
+ets_out:
+	mutex_unlock(&pf->tc_mutex);
+	return err;
+}
+
+/**
+ * ice_dcbnl_getnumtcs - Get max number of traffic classes supported
+ * @dev: pointer to netdev struct
+ * @tcid: TC ID
+ * @num: total number of TCs supported by the adapter
+ *
+ * Return the total number of TCs supported
+ */
+static int
+ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+	if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		return -EINVAL;
+
+	*num = pf->hw.func_caps.common_cap.maxtc;
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getdcbx - retrieve current DCBX capability
+ * @netdev: pointer to the netdev struct
+ */
+static u8 ice_dcbnl_getdcbx(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	return pf->dcbx_cap;
+}
+
+/**
+ * ice_dcbnl_setdcbx - set required DCBX capability
+ * @netdev: the corresponding netdev
+ * @mode: required mode
+ */
+static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_qos_cfg *qos_cfg;
+
+	/* if FW LLDP agent is running, DCBNL not allowed to change mode */
+	if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* No support for LLD_MANAGED modes or CEE+IEEE */
+	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
+	    !(mode & DCB_CAP_DCBX_HOST))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* Already set to the given mode no change */
+	if (mode == pf->dcbx_cap)
+		return ICE_DCB_NO_HW_CHG;
+
+	pf->dcbx_cap = mode;
+	qos_cfg = &pf->hw.port_info->qos_cfg;
+	if (mode & DCB_CAP_DCBX_VER_CEE)
+		qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
+	else
+		qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
+
+	dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode);
+	return ICE_DCB_HW_CHG_RST;
+}
+
+/**
+ * ice_dcbnl_get_perm_hw_addr - MAC address used by DCBX
+ * @netdev: pointer to netdev struct
+ * @perm_addr: buffer to return permanent MAC address
+ */
+static void ice_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+	int i, j;
+
+	memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+	for (i = 0; i < netdev->addr_len; i++)
+		perm_addr[i] = pi->mac.perm_addr[i];
+
+	for (j = 0; j < netdev->addr_len; j++, i++)
+		perm_addr[i] = pi->mac.perm_addr[j];
+}
+
+/**
+ * ice_get_pfc_delay - Retrieve PFC Link Delay
+ * @hw: pointer to HW struct
+ * @delay: holds the PFC Link Delay value
+ */
+static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay)
+{
+	u32 val;
+
+	val = rd32(hw, PRTDCB_GENC);
+	*delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S);
+}
+
+/**
+ * ice_dcbnl_getpfc - retrieve local IEEE PFC config
+ * @netdev: pointer to netdev struct
+ * @pfc: struct to hold PFC info
+ */
+static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+	struct ice_dcbx_cfg *dcbxcfg;
+	int i;
+
+	dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
+	pfc->pfc_cap = dcbxcfg->pfc.pfccap;
+	pfc->pfc_en = dcbxcfg->pfc.pfcena;
+	pfc->mbc = dcbxcfg->pfc.mbc;
+	ice_get_pfc_delay(&pf->hw, &pfc->delay);
+
+	ice_for_each_traffic_class(i) {
+		pfc->requests[i] = pf->stats.priority_xoff_tx[i];
+		pfc->indications[i] = pf->stats.priority_xoff_rx[i];
+	}
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_setpfc - set local IEEE PFC config
+ * @netdev: pointer to relevant netdev
+ * @pfc: pointer to struct holding PFC config
+ */
+static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int err;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	if (pfc->pfc_cap)
+		new_cfg->pfc.pfccap = pfc->pfc_cap;
+	else
+		new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+
+	new_cfg->pfc.pfcena = pfc->pfc_en;
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+	if (err == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (err == ICE_DCB_NO_HW_CHG)
+		err = ICE_DCB_HW_CHG_RST;
+	mutex_unlock(&pf->tc_mutex);
+	return err;
+}
+
+/**
+ * ice_dcbnl_get_pfc_cfg - Get CEE PFC config
+ * @netdev: pointer to netdev struct
+ * @prio: corresponding user priority
+ * @setting: the PFC setting for given priority
+ */
+static void
+ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*setting = (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
+	dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
+		prio, *setting, pi->qos_cfg.local_dcbx_cfg.pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_set_pfc_cfg - Set CEE PFC config
+ * @netdev: the corresponding netdev
+ * @prio: User Priority
+ * @set: PFC setting to apply
+ */
+static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+	if (set)
+		new_cfg->pfc.pfcena |= BIT(prio);
+	else
+		new_cfg->pfc.pfcena &= ~BIT(prio);
+
+	dev_dbg(ice_pf_to_dev(pf), "Set PFC config UP:%d set:%d pfcena:0x%x\n",
+		prio, set, new_cfg->pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_getpfcstate - get CEE PFC mode
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	/* Return enabled if any UP enabled for PFC */
+	if (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getstate - get DCB enabled state
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getstate(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	u8 state = 0;
+
+	state = test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+
+	dev_dbg(ice_pf_to_dev(pf), "DCB enabled state = %d\n", state);
+	return state;
+}
+
+/**
+ * ice_dcbnl_setstate - Set CEE DCB state
+ * @netdev: pointer to relevant netdev
+ * @state: state value to set
+ */
+static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* Nothing to do */
+	if (!!state == test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return ICE_DCB_NO_HW_CHG;
+
+	if (state) {
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+		memcpy(&pf->hw.port_info->qos_cfg.desired_dcbx_cfg,
+		       &pf->hw.port_info->qos_cfg.local_dcbx_cfg,
+		       sizeof(struct ice_dcbx_cfg));
+	} else {
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	return ICE_DCB_HW_CHG;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_tx - get CEE PG Tx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: traffic priority type
+ * @pgid: the BW group ID the traffic class belongs to
+ * @bw_pct: BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
+			   u8 __always_unused *prio_type, u8 *pgid,
+			   u8 __always_unused *bw_pct,
+			   u8 __always_unused *up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
+	dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio,
+		*pgid);
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_tx - set CEE PG Tx config
+ * @netdev: pointer to relevant netdev
+ * @tc: the corresponding traffic class
+ * @prio_type: the traffic priority type
+ * @bwg_id: the BW group ID the TC belongs to
+ * @bw_pct: the BW perventage for the BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+			   u8 __always_unused prio_type,
+			   u8 __always_unused bwg_id,
+			   u8 __always_unused bw_pct, u8 up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int i;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (tc >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	/* prio_type, bwg_id and bw_pct per UP are not supported */
+
+	ice_for_each_traffic_class(i) {
+		if (up_map & BIT(i))
+			new_cfg->etscfg.prio_table[i] = tc;
+	}
+	new_cfg->etscfg.tsatable[tc] = ICE_IEEE_TSA_ETS;
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_tx - Get CEE PGBW config
+ * @netdev: pointer to the netdev struct
+ * @pgid: corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	*bw_pct = pi->qos_cfg.local_dcbx_cfg.etscfg.tcbwtable[pgid];
+	dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n",
+		pgid, *bw_pct);
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_tx - set CEE PG Tx BW config
+ * @netdev: the corresponding netdev
+ * @pgid: Correspongind traffic class
+ * @bw_pct: the BW percentage for the specified TC
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	new_cfg->etscfg.tcbwtable[pgid] = bw_pct;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
+			   u8 __always_unused *prio_type, u8 *pgid,
+			   u8 __always_unused *bw_pct,
+			   u8 __always_unused *up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_rx
+ * @netdev: relevant netdev struct
+ * @prio: corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: BW percentage for corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ *
+ * lldpad requires this function pointer to be non-NULL to complete CEE config.
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev,
+			   int __always_unused prio,
+			   u8 __always_unused prio_type,
+			   u8 __always_unused pgid,
+			   u8 __always_unused bw_pct,
+			   u8 __always_unused up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	dev_dbg(ice_pf_to_dev(pf), "Rx TC PG Config Not Supported.\n");
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config
+ * @netdev: pointer to netdev struct
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+			    u8 *bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	*bw_pct = 0;
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_rx
+ * @netdev: the corresponding netdev
+ * @pgid: corresponding TC
+ * @bw_pct: BW percentage for given TC
+ *
+ * lldpad requires this function pointer to be non-NULL to complete CEE config.
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+			    u8 __always_unused bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	dev_dbg(ice_pf_to_dev(pf), "Rx BWG PG Config Not Supported.\n");
+}
+
+/**
+ * ice_dcbnl_get_cap - Get DCBX capabilities of adapter
+ * @netdev: pointer to netdev struct
+ * @capid: the capability type
+ * @cap: the capability value
+ */
+static u8 ice_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return ICE_DCB_NO_HW_CHG;
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_UP2TC:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_GSP:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_BCN:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = pf->dcbx_cap;
+		break;
+	default:
+		*cap = false;
+		break;
+	}
+
+	dev_dbg(ice_pf_to_dev(pf), "DCBX Get Capability cap=%d capval=0x%x\n",
+		capid, *cap);
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getapp - get CEE APP
+ * @netdev: pointer to netdev struct
+ * @idtype: the App selector
+ * @id: the App ethtype or port number
+ */
+static int ice_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct dcb_app app = {
+				.selector = idtype,
+				.protocol = id,
+			     };
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return -EINVAL;
+
+	return dcb_getapp(netdev, &app);
+}
+
+/**
+ * ice_dcbnl_find_app - Search for APP in given DCB config
+ * @cfg: struct to hold DCBX config
+ * @app: struct to hold app data to look for
+ */
+static bool
+ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg,
+		   struct ice_dcb_app_priority_table *app)
+{
+	unsigned int i;
+
+	for (i = 0; i < cfg->numapps; i++) {
+		if (app->selector == cfg->app[i].selector &&
+		    app->prot_id == cfg->app[i].prot_id &&
+		    app->priority == cfg->app[i].priority)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_dcbnl_setapp - set local IEEE App config
+ * @netdev: relevant netdev struct
+ * @app: struct to hold app config info
+ */
+static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcb_app_priority_table new_app;
+	struct ice_dcbx_cfg *old_cfg, *new_cfg;
+	int ret;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	if (old_cfg->numapps == ICE_DCBX_MAX_APPS) {
+		ret = -EINVAL;
+		goto setapp_out;
+	}
+
+	ret = dcb_ieee_setapp(netdev, app);
+	if (ret)
+		goto setapp_out;
+
+	new_app.selector = app->selector;
+	new_app.prot_id = app->protocol;
+	new_app.priority = app->priority;
+	if (ice_dcbnl_find_app(old_cfg, &new_app)) {
+		ret = 0;
+		goto setapp_out;
+	}
+
+	new_cfg->app[new_cfg->numapps++] = new_app;
+	ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (ret == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (ret == ICE_DCB_NO_HW_CHG)
+		ret = ICE_DCB_HW_CHG_RST;
+
+setapp_out:
+	mutex_unlock(&pf->tc_mutex);
+	return ret;
+}
+
+/**
+ * ice_dcbnl_delapp - Delete local IEEE App config
+ * @netdev: relevant netdev
+ * @app: struct to hold app too delete
+ *
+ * Will not delete first application required by the FW
+ */
+static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *old_cfg, *new_cfg;
+	unsigned int i, j;
+	int ret = 0;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+	old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	if (old_cfg->numapps <= 1)
+		goto delapp_out;
+
+	ret = dcb_ieee_delapp(netdev, app);
+	if (ret)
+		goto delapp_out;
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	for (i = 1; i < new_cfg->numapps; i++) {
+		if (app->selector == new_cfg->app[i].selector &&
+		    app->protocol == new_cfg->app[i].prot_id &&
+		    app->priority == new_cfg->app[i].priority) {
+			new_cfg->app[i].selector = 0;
+			new_cfg->app[i].prot_id = 0;
+			new_cfg->app[i].priority = 0;
+			break;
+		}
+	}
+
+	/* Did not find DCB App */
+	if (i == new_cfg->numapps) {
+		ret = -EINVAL;
+		goto delapp_out;
+	}
+
+	new_cfg->numapps--;
+
+	for (j = i; j < new_cfg->numapps; j++) {
+		new_cfg->app[i].selector = old_cfg->app[j + 1].selector;
+		new_cfg->app[i].prot_id = old_cfg->app[j + 1].prot_id;
+		new_cfg->app[i].priority = old_cfg->app[j + 1].priority;
+	}
+
+	ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (ret == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (ret == ICE_DCB_NO_HW_CHG)
+		ret = ICE_DCB_HW_CHG_RST;
+
+delapp_out:
+	mutex_unlock(&pf->tc_mutex);
+	return ret;
+}
+
+/**
+ * ice_dcbnl_cee_set_all - Commit CEE DCB settings to HW
+ * @netdev: the corresponding netdev
+ */
+static u8 ice_dcbnl_cee_set_all(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int err;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return ICE_DCB_NO_HW_CHG;
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	mutex_lock(&pf->tc_mutex);
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+
+	mutex_unlock(&pf->tc_mutex);
+	return (err != ICE_DCB_HW_CHG_RST) ? ICE_DCB_NO_HW_CHG : err;
+}
+
+static const struct dcbnl_rtnl_ops dcbnl_ops = {
+	/* IEEE 802.1Qaz std */
+	.ieee_getets = ice_dcbnl_getets,
+	.ieee_setets = ice_dcbnl_setets,
+	.ieee_getpfc = ice_dcbnl_getpfc,
+	.ieee_setpfc = ice_dcbnl_setpfc,
+	.ieee_setapp = ice_dcbnl_setapp,
+	.ieee_delapp = ice_dcbnl_delapp,
+
+	/* CEE std */
+	.getstate = ice_dcbnl_getstate,
+	.setstate = ice_dcbnl_setstate,
+	.getpermhwaddr = ice_dcbnl_get_perm_hw_addr,
+	.setpgtccfgtx = ice_dcbnl_set_pg_tc_cfg_tx,
+	.setpgbwgcfgtx = ice_dcbnl_set_pg_bwg_cfg_tx,
+	.setpgtccfgrx = ice_dcbnl_set_pg_tc_cfg_rx,
+	.setpgbwgcfgrx = ice_dcbnl_set_pg_bwg_cfg_rx,
+	.getpgtccfgtx = ice_dcbnl_get_pg_tc_cfg_tx,
+	.getpgbwgcfgtx = ice_dcbnl_get_pg_bwg_cfg_tx,
+	.getpgtccfgrx = ice_dcbnl_get_pg_tc_cfg_rx,
+	.getpgbwgcfgrx = ice_dcbnl_get_pg_bwg_cfg_rx,
+	.setpfccfg = ice_dcbnl_set_pfc_cfg,
+	.getpfccfg = ice_dcbnl_get_pfc_cfg,
+	.setall = ice_dcbnl_cee_set_all,
+	.getcap = ice_dcbnl_get_cap,
+	.getnumtcs = ice_dcbnl_getnumtcs,
+	.getpfcstate = ice_dcbnl_getpfcstate,
+	.getapp = ice_dcbnl_getapp,
+
+	/* DCBX configuration */
+	.getdcbx = ice_dcbnl_getdcbx,
+	.setdcbx = ice_dcbnl_setdcbx,
+};
+
+/**
+ * ice_dcbnl_set_all - set all the apps and ieee data from DCBX config
+ * @vsi: pointer to VSI struct
+ */
+void ice_dcbnl_set_all(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_dcbx_cfg *dcbxcfg;
+	struct ice_port_info *pi;
+	struct dcb_app sapp;
+	struct ice_pf *pf;
+	unsigned int i;
+
+	if (!netdev)
+		return;
+
+	pf = ice_netdev_to_pf(netdev);
+	pi = pf->hw.port_info;
+
+	/* SW DCB taken care of by SW Default Config */
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST)
+		return;
+
+	/* DCB not enabled */
+	if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return;
+
+	dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
+
+	for (i = 0; i < dcbxcfg->numapps; i++) {
+		u8 prio, tc_map;
+
+		prio = dcbxcfg->app[i].priority;
+		tc_map = BIT(dcbxcfg->etscfg.prio_table[prio]);
+
+		/* Add APP only if the TC is enabled for this VSI */
+		if (tc_map & vsi->tc_cfg.ena_tc) {
+			sapp.selector = dcbxcfg->app[i].selector;
+			sapp.protocol = dcbxcfg->app[i].prot_id;
+			sapp.priority = prio;
+			dcb_ieee_setapp(netdev, &sapp);
+		}
+	}
+	/* Notify user-space of the changes */
+	dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0);
+}
+
+/**
+ * ice_dcbnl_vsi_del_app - Delete APP on all VSIs
+ * @vsi: pointer to the main VSI
+ * @app: APP to delete
+ *
+ * Delete given APP from all the VSIs for given PF
+ */
+static void
+ice_dcbnl_vsi_del_app(struct ice_vsi *vsi,
+		      struct ice_dcb_app_priority_table *app)
+{
+	struct dcb_app sapp;
+	int err;
+
+	sapp.selector = app->selector;
+	sapp.protocol = app->prot_id;
+	sapp.priority = app->priority;
+	err = ice_dcbnl_delapp(vsi->netdev, &sapp);
+	dev_dbg(ice_pf_to_dev(vsi->back), "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n",
+		vsi->idx, err, app->selector, app->prot_id, app->priority);
+}
+
+/**
+ * ice_dcbnl_flush_apps - Delete all removed APPs
+ * @pf: the corresponding PF
+ * @old_cfg: old DCBX configuration data
+ * @new_cfg: new DCBX configuration data
+ *
+ * Find and delete all APPS that are not present in the passed
+ * DCB configuration
+ */
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg)
+{
+	struct ice_vsi *main_vsi = ice_get_main_vsi(pf);
+	unsigned int i;
+
+	if (!main_vsi)
+		return;
+
+	for (i = 0; i < old_cfg->numapps; i++) {
+		struct ice_dcb_app_priority_table app = old_cfg->app[i];
+
+		/* The APP is not available anymore delete it */
+		if (!ice_dcbnl_find_app(new_cfg, &app))
+			ice_dcbnl_vsi_del_app(main_vsi, &app);
+	}
+}
+
+/**
+ * ice_dcbnl_setup - setup DCBNL
+ * @vsi: VSI to get associated netdev from
+ */
+void ice_dcbnl_setup(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_pf *pf;
+
+	pf = ice_netdev_to_pf(netdev);
+	if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		return;
+
+	netdev->dcbnl_ops = &dcbnl_ops;
+	ice_dcbnl_set_all(vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
new file mode 100644
index 000000000..6c630a362
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_NL_H_
+#define _ICE_DCB_NL_H_
+
+#ifdef CONFIG_DCB
+void ice_dcbnl_setup(struct ice_vsi *vsi);
+void ice_dcbnl_set_all(struct ice_vsi *vsi);
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg);
+#else
+#define ice_dcbnl_setup(vsi) do {} while (0)
+#define ice_dcbnl_set_all(vsi) do {} while (0)
+#define ice_dcbnl_flush_apps(pf, old_cfg, new_cfg) do {} while (0)
+#endif /* CONFIG_DCB */
+
+#endif /* _ICE_DCB_NL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
new file mode 100644
index 000000000..ef4392e6e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_DEVIDS_H_
+#define _ICE_DEVIDS_H_
+
+/* Device IDs */
+/* Intel(R) Ethernet Connection E823-L for backplane */
+#define ICE_DEV_ID_E823L_BACKPLANE	0x124C
+/* Intel(R) Ethernet Connection E823-L for SFP */
+#define ICE_DEV_ID_E823L_SFP		0x124D
+/* Intel(R) Ethernet Connection E823-L/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E823L_10G_BASE_T	0x124E
+/* Intel(R) Ethernet Connection E823-L 1GbE */
+#define ICE_DEV_ID_E823L_1GBE		0x124F
+/* Intel(R) Ethernet Connection E823-L for QSFP */
+#define ICE_DEV_ID_E823L_QSFP		0x151D
+/* Intel(R) Ethernet Controller E810-C for backplane */
+#define ICE_DEV_ID_E810C_BACKPLANE	0x1591
+/* Intel(R) Ethernet Controller E810-C for QSFP */
+#define ICE_DEV_ID_E810C_QSFP		0x1592
+/* Intel(R) Ethernet Controller E810-C for SFP */
+#define ICE_DEV_ID_E810C_SFP		0x1593
+/* Intel(R) Ethernet Controller E810-XXV for backplane */
+#define ICE_DEV_ID_E810_XXV_BACKPLANE	0x1599
+/* Intel(R) Ethernet Controller E810-XXV for QSFP */
+#define ICE_DEV_ID_E810_XXV_QSFP	0x159A
+/* Intel(R) Ethernet Controller E810-XXV for SFP */
+#define ICE_DEV_ID_E810_XXV_SFP		0x159B
+/* Intel(R) Ethernet Connection E823-C for backplane */
+#define ICE_DEV_ID_E823C_BACKPLANE	0x188A
+/* Intel(R) Ethernet Connection E823-C for QSFP */
+#define ICE_DEV_ID_E823C_QSFP		0x188B
+/* Intel(R) Ethernet Connection E823-C for SFP */
+#define ICE_DEV_ID_E823C_SFP		0x188C
+/* Intel(R) Ethernet Connection E823-C/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E823C_10G_BASE_T	0x188D
+/* Intel(R) Ethernet Connection E823-C 1GbE */
+#define ICE_DEV_ID_E823C_SGMII		0x188E
+/* Intel(R) Ethernet Connection E822-C for backplane */
+#define ICE_DEV_ID_E822C_BACKPLANE	0x1890
+/* Intel(R) Ethernet Connection E822-C for QSFP */
+#define ICE_DEV_ID_E822C_QSFP		0x1891
+/* Intel(R) Ethernet Connection E822-C for SFP */
+#define ICE_DEV_ID_E822C_SFP		0x1892
+/* Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822C_10G_BASE_T	0x1893
+/* Intel(R) Ethernet Connection E822-C 1GbE */
+#define ICE_DEV_ID_E822C_SGMII		0x1894
+/* Intel(R) Ethernet Connection E822-L for backplane */
+#define ICE_DEV_ID_E822L_BACKPLANE	0x1897
+/* Intel(R) Ethernet Connection E822-L for SFP */
+#define ICE_DEV_ID_E822L_SFP		0x1898
+/* Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822L_10G_BASE_T	0x1899
+/* Intel(R) Ethernet Connection E822-L 1GbE */
+#define ICE_DEV_ID_E822L_SGMII		0x189A
+
+#endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
new file mode 100644
index 000000000..f18ce43b7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_devlink.h"
+#include "ice_fw_update.h"
+
+static void ice_info_get_dsn(struct ice_pf *pf, char *buf, size_t len)
+{
+	u8 dsn[8];
+
+	/* Copy the DSN into an array in Big Endian format */
+	put_unaligned_be64(pci_get_dsn(pf->pdev), dsn);
+
+	snprintf(buf, len, "%8phD", dsn);
+}
+
+static int ice_info_pba(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+
+	status = ice_read_pba_string(hw, (u8 *)buf, len);
+	if (status)
+		/* We failed to locate the PBA, so just skip this entry */
+		dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %s\n",
+			ice_stat_str(status));
+
+	return 0;
+}
+
+static int ice_info_fw_mgmt(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	snprintf(buf, len, "%u.%u.%u", hw->fw_maj_ver, hw->fw_min_ver,
+		 hw->fw_patch);
+
+	return 0;
+}
+
+static int ice_info_fw_api(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	snprintf(buf, len, "%u.%u", hw->api_maj_ver, hw->api_min_ver);
+
+	return 0;
+}
+
+static int ice_info_fw_build(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	snprintf(buf, len, "0x%08x", hw->fw_build);
+
+	return 0;
+}
+
+static int ice_info_orom_ver(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_orom_info *orom = &pf->hw.nvm.orom;
+
+	snprintf(buf, len, "%u.%u.%u", orom->major, orom->build, orom->patch);
+
+	return 0;
+}
+
+static int ice_info_nvm_ver(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_nvm_info *nvm = &pf->hw.nvm;
+
+	snprintf(buf, len, "%x.%02x", nvm->major_ver, nvm->minor_ver);
+
+	return 0;
+}
+
+static int ice_info_eetrack(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_nvm_info *nvm = &pf->hw.nvm;
+
+	snprintf(buf, len, "0x%08x", nvm->eetrack);
+
+	return 0;
+}
+
+static int ice_info_ddp_pkg_name(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	snprintf(buf, len, "%s", hw->active_pkg_name);
+
+	return 0;
+}
+
+static int ice_info_ddp_pkg_version(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver;
+
+	snprintf(buf, len, "%u.%u.%u.%u", pkg->major, pkg->minor, pkg->update,
+		 pkg->draft);
+
+	return 0;
+}
+
+static int ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, char *buf, size_t len)
+{
+	snprintf(buf, len, "0x%08x", pf->hw.active_track_id);
+
+	return 0;
+}
+
+static int ice_info_netlist_ver(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver;
+
+	/* The netlist version fields are BCD formatted */
+	snprintf(buf, len, "%x.%x.%x-%x.%x.%x", netlist->major, netlist->minor,
+		 netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev,
+		 netlist->cust_ver);
+
+	return 0;
+}
+
+static int ice_info_netlist_build(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver;
+
+	snprintf(buf, len, "0x%08x", netlist->hash);
+
+	return 0;
+}
+
+#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter }
+#define running(key, getter) { ICE_VERSION_RUNNING, key, getter }
+
+enum ice_version_type {
+	ICE_VERSION_FIXED,
+	ICE_VERSION_RUNNING,
+	ICE_VERSION_STORED,
+};
+
+static const struct ice_devlink_version {
+	enum ice_version_type type;
+	const char *key;
+	int (*getter)(struct ice_pf *pf, char *buf, size_t len);
+} ice_devlink_versions[] = {
+	fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba),
+	running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt),
+	running("fw.mgmt.api", ice_info_fw_api),
+	running("fw.mgmt.build", ice_info_fw_build),
+	running(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver),
+	running("fw.psid.api", ice_info_nvm_ver),
+	running(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack),
+	running("fw.app.name", ice_info_ddp_pkg_name),
+	running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version),
+	running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id),
+	running("fw.netlist", ice_info_netlist_ver),
+	running("fw.netlist.build", ice_info_netlist_build),
+};
+
+/**
+ * ice_devlink_info_get - .info_get devlink handler
+ * @devlink: devlink instance structure
+ * @req: the devlink info request
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .info_get operation. Reports information about the
+ * device.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int ice_devlink_info_get(struct devlink *devlink,
+				struct devlink_info_req *req,
+				struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	char buf[100];
+	size_t i;
+	int err;
+
+	err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name");
+		return err;
+	}
+
+	ice_info_get_dsn(pf, buf, sizeof(buf));
+
+	err = devlink_info_serial_number_put(req, buf);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number");
+		return err;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) {
+		enum ice_version_type type = ice_devlink_versions[i].type;
+		const char *key = ice_devlink_versions[i].key;
+
+		err = ice_devlink_versions[i].getter(pf, buf, sizeof(buf));
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Unable to obtain version info");
+			return err;
+		}
+
+		switch (type) {
+		case ICE_VERSION_FIXED:
+			err = devlink_info_version_fixed_put(req, key, buf);
+			if (err) {
+				NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version");
+				return err;
+			}
+			break;
+		case ICE_VERSION_RUNNING:
+			err = devlink_info_version_running_put(req, key, buf);
+			if (err) {
+				NL_SET_ERR_MSG_MOD(extack, "Unable to set running version");
+				return err;
+			}
+			break;
+		case ICE_VERSION_STORED:
+			err = devlink_info_version_stored_put(req, key, buf);
+			if (err) {
+				NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version");
+				return err;
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_flash_update - Update firmware stored in flash on the device
+ * @devlink: pointer to devlink associated with device to update
+ * @params: flash update parameters
+ * @extack: netlink extended ACK structure
+ *
+ * Perform a device flash update. The bulk of the update logic is contained
+ * within the ice_flash_pldm_image function.
+ *
+ * Returns: zero on success, or an error code on failure.
+ */
+static int
+ice_devlink_flash_update(struct devlink *devlink,
+			 struct devlink_flash_update_params *params,
+			 struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	struct device *dev = &pf->pdev->dev;
+	struct ice_hw *hw = &pf->hw;
+	const struct firmware *fw;
+	u8 preservation;
+	int err;
+
+	if (!params->overwrite_mask) {
+		/* preserve all settings and identifiers */
+		preservation = ICE_AQC_NVM_PRESERVE_ALL;
+	} else if (params->overwrite_mask == DEVLINK_FLASH_OVERWRITE_SETTINGS) {
+		/* overwrite settings, but preserve the vital device identifiers */
+		preservation = ICE_AQC_NVM_PRESERVE_SELECTED;
+	} else if (params->overwrite_mask == (DEVLINK_FLASH_OVERWRITE_SETTINGS |
+					      DEVLINK_FLASH_OVERWRITE_IDENTIFIERS)) {
+		/* overwrite both settings and identifiers, preserve nothing */
+		preservation = ICE_AQC_NVM_NO_PRESERVATION;
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "Requested overwrite mask is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (!hw->dev_caps.common_cap.nvm_unified_update) {
+		NL_SET_ERR_MSG_MOD(extack, "Current firmware does not support unified update");
+		return -EOPNOTSUPP;
+	}
+
+	err = ice_check_for_pending_update(pf, NULL, extack);
+	if (err)
+		return err;
+
+	err = request_firmware(&fw, params->file_name, dev);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to read file from disk");
+		return err;
+	}
+
+	dev_dbg(dev, "Beginning flash update with file '%s'\n", params->file_name);
+
+	devlink_flash_update_begin_notify(devlink);
+	devlink_flash_update_status_notify(devlink, "Preparing to flash", NULL, 0, 0);
+	err = ice_flash_pldm_image(pf, fw, preservation, extack);
+	devlink_flash_update_end_notify(devlink);
+
+	release_firmware(fw);
+
+	return err;
+}
+
+static const struct devlink_ops ice_devlink_ops = {
+	.supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
+	.info_get = ice_devlink_info_get,
+	.flash_update = ice_devlink_flash_update,
+};
+
+static void ice_devlink_free(void *devlink_ptr)
+{
+	devlink_free((struct devlink *)devlink_ptr);
+}
+
+/**
+ * ice_allocate_pf - Allocate devlink and return PF structure pointer
+ * @dev: the device to allocate for
+ *
+ * Allocate a devlink instance for this device and return the private area as
+ * the PF structure. The devlink memory is kept track of through devres by
+ * adding an action to remove it when unwinding.
+ */
+struct ice_pf *ice_allocate_pf(struct device *dev)
+{
+	struct devlink *devlink;
+
+	devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf));
+	if (!devlink)
+		return NULL;
+
+	/* Add an action to teardown the devlink when unwinding the driver */
+	if (devm_add_action(dev, ice_devlink_free, devlink)) {
+		devlink_free(devlink);
+		return NULL;
+	}
+
+	return devlink_priv(devlink);
+}
+
+/**
+ * ice_devlink_register - Register devlink interface for this PF
+ * @pf: the PF to register the devlink for.
+ *
+ * Register the devlink instance associated with this physical function.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_register(struct ice_pf *pf)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	err = devlink_register(devlink, dev);
+	if (err) {
+		dev_err(dev, "devlink registration failed: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_unregister - Unregister devlink resources for this PF.
+ * @pf: the PF structure to cleanup
+ *
+ * Releases resources used by devlink and cleans up associated memory.
+ */
+void ice_devlink_unregister(struct ice_pf *pf)
+{
+	devlink_unregister(priv_to_devlink(pf));
+}
+
+/**
+ * ice_devlink_create_port - Create a devlink port for this VSI
+ * @vsi: the VSI to create a port for
+ *
+ * Create and register a devlink_port for this VSI.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_create_port(struct ice_vsi *vsi)
+{
+	struct devlink_port_attrs attrs = {};
+	struct ice_port_info *pi;
+	struct devlink *devlink;
+	struct device *dev;
+	struct ice_pf *pf;
+	int err;
+
+	/* Currently we only create devlink_port instances for PF VSIs */
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	pf = vsi->back;
+	devlink = priv_to_devlink(pf);
+	dev = ice_pf_to_dev(pf);
+	pi = pf->hw.port_info;
+
+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+	attrs.phys.port_number = pi->lport;
+	devlink_port_attrs_set(&vsi->devlink_port, &attrs);
+	err = devlink_port_register(devlink, &vsi->devlink_port, vsi->idx);
+	if (err) {
+		dev_err(dev, "devlink_port_register failed: %d\n", err);
+		return err;
+	}
+
+	vsi->devlink_port_registered = true;
+
+	return 0;
+}
+
+/**
+ * ice_devlink_destroy_port - Destroy the devlink_port for this VSI
+ * @vsi: the VSI to cleanup
+ *
+ * Unregisters the devlink_port structure associated with this VSI.
+ */
+void ice_devlink_destroy_port(struct ice_vsi *vsi)
+{
+	if (!vsi->devlink_port_registered)
+		return;
+
+	devlink_port_type_clear(&vsi->devlink_port);
+	devlink_port_unregister(&vsi->devlink_port);
+
+	vsi->devlink_port_registered = false;
+}
+
+/**
+ * ice_devlink_nvm_snapshot - Capture a snapshot of the Shadow RAM contents
+ * @devlink: the devlink instance
+ * @ops: the devlink region being snapshotted
+ * @extack: extended ACK response structure
+ * @data: on exit points to snapshot data buffer
+ *
+ * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
+ * the shadow-ram devlink region. It captures a snapshot of the shadow ram
+ * contents. This snapshot can later be viewed via the devlink-region
+ * interface.
+ *
+ * @returns zero on success, and updates the data pointer. Returns a non-zero
+ * error code on failure.
+ */
+static int ice_devlink_nvm_snapshot(struct devlink *devlink,
+				    const struct devlink_region_ops *ops,
+				    struct netlink_ext_ack *extack, u8 **data)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	void *nvm_data;
+	u32 nvm_size;
+
+	nvm_size = hw->nvm.flash_size;
+	nvm_data = vzalloc(nvm_size);
+	if (!nvm_data)
+		return -ENOMEM;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status) {
+		dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+			status, hw->adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+		vfree(nvm_data);
+		return -EIO;
+	}
+
+	status = ice_read_flat_nvm(hw, 0, &nvm_size, nvm_data, false);
+	if (status) {
+		dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
+			nvm_size, status, hw->adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
+		ice_release_nvm(hw);
+		vfree(nvm_data);
+		return -EIO;
+	}
+
+	ice_release_nvm(hw);
+
+	*data = nvm_data;
+
+	return 0;
+}
+
+/**
+ * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities
+ * @devlink: the devlink instance
+ * @ops: the devlink region being snapshotted
+ * @extack: extended ACK response structure
+ * @data: on exit points to snapshot data buffer
+ *
+ * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
+ * the device-caps devlink region. It captures a snapshot of the device
+ * capabilities reported by firmware.
+ *
+ * @returns zero on success, and updates the data pointer. Returns a non-zero
+ * error code on failure.
+ */
+static int
+ice_devlink_devcaps_snapshot(struct devlink *devlink,
+			     const struct devlink_region_ops *ops,
+			     struct netlink_ext_ack *extack, u8 **data)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	void *devcaps;
+
+	devcaps = vzalloc(ICE_AQ_MAX_BUF_LEN);
+	if (!devcaps)
+		return -ENOMEM;
+
+	status = ice_aq_list_caps(hw, devcaps, ICE_AQ_MAX_BUF_LEN, NULL,
+				  ice_aqc_opc_list_dev_caps, NULL);
+	if (status) {
+		dev_dbg(dev, "ice_aq_list_caps: failed to read device capabilities, err %d aq_err %d\n",
+			status, hw->adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Failed to read device capabilities");
+		vfree(devcaps);
+		return -EIO;
+	}
+
+	*data = (u8 *)devcaps;
+
+	return 0;
+}
+
+static const struct devlink_region_ops ice_nvm_region_ops = {
+	.name = "nvm-flash",
+	.destructor = vfree,
+	.snapshot = ice_devlink_nvm_snapshot,
+};
+
+static const struct devlink_region_ops ice_devcaps_region_ops = {
+	.name = "device-caps",
+	.destructor = vfree,
+	.snapshot = ice_devlink_devcaps_snapshot,
+};
+
+/**
+ * ice_devlink_init_regions - Initialize devlink regions
+ * @pf: the PF device structure
+ *
+ * Create devlink regions used to enable access to dump the contents of the
+ * flash memory on the device.
+ */
+void ice_devlink_init_regions(struct ice_pf *pf)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+	struct device *dev = ice_pf_to_dev(pf);
+	u64 nvm_size;
+
+	nvm_size = pf->hw.nvm.flash_size;
+	pf->nvm_region = devlink_region_create(devlink, &ice_nvm_region_ops, 1,
+					       nvm_size);
+	if (IS_ERR(pf->nvm_region)) {
+		dev_err(dev, "failed to create NVM devlink region, err %ld\n",
+			PTR_ERR(pf->nvm_region));
+		pf->nvm_region = NULL;
+	}
+
+	pf->devcaps_region = devlink_region_create(devlink,
+						   &ice_devcaps_region_ops, 10,
+						   ICE_AQ_MAX_BUF_LEN);
+	if (IS_ERR(pf->devcaps_region)) {
+		dev_err(dev, "failed to create device-caps devlink region, err %ld\n",
+			PTR_ERR(pf->devcaps_region));
+		pf->devcaps_region = NULL;
+	}
+}
+
+/**
+ * ice_devlink_destroy_regions - Destroy devlink regions
+ * @pf: the PF device structure
+ *
+ * Remove previously created regions for this PF.
+ */
+void ice_devlink_destroy_regions(struct ice_pf *pf)
+{
+	if (pf->nvm_region)
+		devlink_region_destroy(pf->nvm_region);
+	if (pf->devcaps_region)
+		devlink_region_destroy(pf->devcaps_region);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h
new file mode 100644
index 000000000..e07e74426
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DEVLINK_H_
+#define _ICE_DEVLINK_H_
+
+struct ice_pf *ice_allocate_pf(struct device *dev);
+
+int ice_devlink_register(struct ice_pf *pf);
+void ice_devlink_unregister(struct ice_pf *pf);
+int ice_devlink_create_port(struct ice_vsi *vsi);
+void ice_devlink_destroy_port(struct ice_vsi *vsi);
+
+void ice_devlink_init_regions(struct ice_pf *pf);
+void ice_devlink_destroy_regions(struct ice_pf *pf);
+
+#endif /* _ICE_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
new file mode 100644
index 000000000..7f1bf7184
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -0,0 +1,4061 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* ethtool support for ice */
+
+#include "ice.h"
+#include "ice_flow.h"
+#include "ice_fltr.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+#include <net/dcbnl.h>
+
+struct ice_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define ICE_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = sizeof_field(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+#define ICE_VSI_STAT(_name, _stat) \
+		ICE_STAT(struct ice_vsi, _name, _stat)
+#define ICE_PF_STAT(_name, _stat) \
+		ICE_STAT(struct ice_pf, _name, _stat)
+
+static int ice_q_stats_len(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) *
+		(sizeof(struct ice_q_stats) / sizeof(u64)));
+}
+
+#define ICE_PF_STATS_LEN	ARRAY_SIZE(ice_gstrings_pf_stats)
+#define ICE_VSI_STATS_LEN	ARRAY_SIZE(ice_gstrings_vsi_stats)
+
+#define ICE_PFC_STATS_LEN ( \
+		(sizeof_field(struct ice_pf, stats.priority_xoff_rx) + \
+		 sizeof_field(struct ice_pf, stats.priority_xon_rx) + \
+		 sizeof_field(struct ice_pf, stats.priority_xoff_tx) + \
+		 sizeof_field(struct ice_pf, stats.priority_xon_tx)) \
+		 / sizeof(u64))
+#define ICE_ALL_STATS_LEN(n)	(ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \
+				 ICE_VSI_STATS_LEN + ice_q_stats_len(n))
+
+static const struct ice_stats ice_gstrings_vsi_stats[] = {
+	ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
+	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
+	ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
+	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
+	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
+	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards),
+	ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
+	ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
+	ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+	ICE_VSI_STAT("rx_gro_dropped", rx_gro_dropped),
+	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+	ICE_VSI_STAT("tx_linearize", tx_linearize),
+	ICE_VSI_STAT("tx_busy", tx_busy),
+	ICE_VSI_STAT("tx_restart", tx_restart),
+};
+
+enum ice_ethtool_test_id {
+	ICE_ETH_TEST_REG = 0,
+	ICE_ETH_TEST_EEPROM,
+	ICE_ETH_TEST_INTR,
+	ICE_ETH_TEST_LOOP,
+	ICE_ETH_TEST_LINK,
+};
+
+static const char ice_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)",
+	"EEPROM test    (offline)",
+	"Interrupt test (offline)",
+	"Loopback test  (offline)",
+	"Link test   (on/offline)",
+};
+
+#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN)
+
+/* These PF_STATs might look like duplicates of some NETDEV_STATs,
+ * but they aren't. This device is capable of supporting multiple
+ * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
+ * netdevs whereas the PF_STATs are for the physical function that's
+ * hosting these netdevs.
+ *
+ * The PF_STATs are appended to the netdev stats only when ethtool -S
+ * is queried on the base PF netdev.
+ */
+static const struct ice_stats ice_gstrings_pf_stats[] = {
+	ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes),
+	ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes),
+	ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast),
+	ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast),
+	ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast),
+	ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast),
+	ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast),
+	ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast),
+	ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors),
+	ICE_PF_STAT("tx_timeout.nic", tx_timeout_count),
+	ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64),
+	ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64),
+	ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127),
+	ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127),
+	ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255),
+	ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255),
+	ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511),
+	ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511),
+	ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023),
+	ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023),
+	ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522),
+	ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522),
+	ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big),
+	ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big),
+	ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx),
+	ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx),
+	ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx),
+	ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx),
+	ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down),
+	ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize),
+	ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments),
+	ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
+	ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
+	ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
+	ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors),
+	ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
+	ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
+	ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
+	ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
+	ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
+	ICE_PF_STAT("fdir_sb_match.nic", stats.fd_sb_match),
+	ICE_PF_STAT("fdir_sb_status.nic", stats.fd_sb_status),
+};
+
+static const u32 ice_regs_dump_list[] = {
+	PFGEN_STATE,
+	PRTGEN_STATUS,
+	QRX_CTRL(0),
+	QINT_TQCTL(0),
+	QINT_RQCTL(0),
+	PFINT_OICR_ENA,
+	QRX_ITR(0),
+};
+
+struct ice_priv_flag {
+	char name[ETH_GSTRING_LEN];
+	u32 bitno;			/* bit position in pf->flags */
+};
+
+#define ICE_PRIV_FLAG(_name, _bitno) { \
+	.name = _name, \
+	.bitno = _bitno, \
+}
+
+static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
+	ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
+	ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+	ICE_PRIV_FLAG("vf-true-promisc-support",
+		      ICE_FLAG_VF_TRUE_PROMISC_ENA),
+	ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF),
+	ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
+};
+
+#define ICE_PRIV_FLAG_ARRAY_SIZE	ARRAY_SIZE(ice_gstrings_priv_flags)
+
+static void
+ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_orom_info *orom;
+	struct ice_nvm_info *nvm;
+
+	nvm = &hw->nvm;
+	orom = &nvm->orom;
+
+	strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+
+	/* Display NVM version (from which the firmware version can be
+	 * determined) which contains more pertinent information.
+	 */
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%x.%02x 0x%x %d.%d.%d", nvm->major_ver, nvm->minor_ver,
+		 nvm->eetrack, orom->major, orom->build, orom->patch);
+
+	strscpy(drvinfo->bus_info, pci_name(pf->pdev),
+		sizeof(drvinfo->bus_info));
+	drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
+}
+
+static int ice_get_regs_len(struct net_device __always_unused *netdev)
+{
+	return sizeof(ice_regs_dump_list);
+}
+
+static void
+ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 *regs_buf = (u32 *)p;
+	unsigned int i;
+
+	regs->version = 1;
+
+	for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list); ++i)
+		regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
+}
+
+static u32 ice_get_msglevel(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (pf->hw.debug_mask)
+		netdev_info(netdev, "hw debug_mask: 0x%llX\n",
+			    pf->hw.debug_mask);
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+	return pf->msg_enable;
+}
+
+static void ice_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (ICE_DBG_USER & data)
+		pf->hw.debug_mask = data;
+	else
+		pf->msg_enable = data;
+#else
+	pf->msg_enable = data;
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+}
+
+static int ice_get_eeprom_len(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	return (int)pf->hw.nvm.flash_size;
+}
+
+static int
+ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
+	       u8 *bytes)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	struct device *dev;
+	int ret = 0;
+	u8 *buf;
+
+	dev = ice_pf_to_dev(pf);
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+	netdev_dbg(netdev, "GEEPROM cmd 0x%08x, offset 0x%08x, len 0x%08x\n",
+		   eeprom->cmd, eeprom->offset, eeprom->len);
+
+	buf = kzalloc(eeprom->len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status) {
+		dev_err(dev, "ice_acquire_nvm failed, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		ret = -EIO;
+		goto out;
+	}
+
+	status = ice_read_flat_nvm(hw, eeprom->offset, &eeprom->len, buf,
+				   false);
+	if (status) {
+		dev_err(dev, "ice_read_flat_nvm failed, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		ret = -EIO;
+		goto release;
+	}
+
+	memcpy(bytes, buf, eeprom->len);
+release:
+	ice_release_nvm(hw);
+out:
+	kfree(buf);
+	return ret;
+}
+
+/**
+ * ice_active_vfs - check if there are any active VFs
+ * @pf: board private structure
+ *
+ * Returns true if an active VF is found, otherwise returns false
+ */
+static bool ice_active_vfs(struct ice_pf *pf)
+{
+	unsigned int i;
+
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_link_test - perform a link test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_link_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	enum ice_status status;
+	bool link_up = false;
+
+	netdev_info(netdev, "link test\n");
+	status = ice_get_link_status(np->vsi->port_info, &link_up);
+	if (status) {
+		netdev_err(netdev, "link query error, status = %s\n",
+			   ice_stat_str(status));
+		return 1;
+	}
+
+	if (!link_up)
+		return 2;
+
+	return 0;
+}
+
+/**
+ * ice_eeprom_test - perform an EEPROM test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_eeprom_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	netdev_info(netdev, "EEPROM test\n");
+	return !!(ice_nvm_validate_checksum(&pf->hw));
+}
+
+/**
+ * ice_reg_pattern_test
+ * @hw: pointer to the HW struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ */
+static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
+{
+	struct ice_pf *pf = (struct ice_pf *)hw->back;
+	struct device *dev = ice_pf_to_dev(pf);
+	static const u32 patterns[] = {
+		0x5A5A5A5A, 0xA5A5A5A5,
+		0x00000000, 0xFFFFFFFF
+	};
+	u32 val, orig_val;
+	unsigned int i;
+
+	orig_val = rd32(hw, reg);
+	for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
+		u32 pattern = patterns[i] & mask;
+
+		wr32(hw, reg, pattern);
+		val = rd32(hw, reg);
+		if (val == pattern)
+			continue;
+		dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
+			, __func__, reg, pattern, val);
+		return 1;
+	}
+
+	wr32(hw, reg, orig_val);
+	val = rd32(hw, reg);
+	if (val != orig_val) {
+		dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
+			, __func__, reg, orig_val, val);
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reg_test - perform a register test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_reg_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_hw *hw = np->vsi->port_info->hw;
+	u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ?
+		hw->func_caps.common_cap.num_msix_vectors - 1 : 1;
+	struct ice_diag_reg_test_info {
+		u32 address;
+		u32 mask;
+		u32 elem_num;
+		u32 elem_size;
+	} ice_reg_list[] = {
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(0, 1) - GLINT_ITR(0, 0)},
+		{GLINT_ITR(1, 0), 0x00000fff, int_elements,
+			GLINT_ITR(1, 1) - GLINT_ITR(1, 0)},
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
+		{GLINT_CTL, 0xffff0001, 1, 0}
+	};
+	unsigned int i;
+
+	netdev_dbg(netdev, "Register test\n");
+	for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
+		u32 j;
+
+		for (j = 0; j < ice_reg_list[i].elem_num; ++j) {
+			u32 mask = ice_reg_list[i].mask;
+			u32 reg = ice_reg_list[i].address +
+				(j * ice_reg_list[i].elem_size);
+
+			/* bail on failure (non-zero return) */
+			if (ice_reg_pattern_test(hw, reg, mask))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_prepare_rings - configure Tx/Rx test rings
+ * @vsi: pointer to the VSI structure
+ *
+ * Function configures rings of a VSI for loopback test without
+ * enabling interrupts or informing the kernel about new queues.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_prepare_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_setup_tx_rings(vsi);
+	if (status)
+		goto err_setup_tx_ring;
+
+	status = ice_vsi_setup_rx_rings(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_cfg(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_start_all_rx_rings(vsi);
+	if (status)
+		goto err_start_rx_ring;
+
+	return status;
+
+err_start_rx_ring:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_rx_ring:
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+err_setup_tx_ring:
+	ice_vsi_free_tx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test
+ * @vsi: pointer to the VSI structure
+ *
+ * Function stops and frees VSI rings after a loopback test.
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_disable_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	status = ice_vsi_stop_all_rx_rings(vsi);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_create_frame - create test packet
+ * @pf: pointer to the PF structure
+ * @ret_data: allocated frame buffer
+ * @size: size of the packet data
+ *
+ * Function allocates a frame with a test pattern on specific offsets.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
+{
+	u8 *data;
+
+	if (!pf)
+		return -EINVAL;
+
+	data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Since the ethernet test frame should always be at least
+	 * 64 bytes long, fill some octets in the payload with test data.
+	 */
+	memset(data, 0xFF, size);
+	data[32] = 0xDE;
+	data[42] = 0xAD;
+	data[44] = 0xBE;
+	data[46] = 0xEF;
+
+	*ret_data = data;
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_check_frame - verify received loopback frame
+ * @frame: pointer to the raw packet data
+ *
+ * Function verifies received test frame with a pattern.
+ * Returns true if frame matches the pattern, false otherwise.
+ */
+static bool ice_lbtest_check_frame(u8 *frame)
+{
+	/* Validate bytes of a frame under offsets chosen earlier */
+	if (frame[32] == 0xDE &&
+	    frame[42] == 0xAD &&
+	    frame[44] == 0xBE &&
+	    frame[46] == 0xEF &&
+	    frame[48] == 0xFF)
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_diag_send - send test frames to the test ring
+ * @tx_ring: pointer to the transmit ring
+ * @data: pointer to the raw packet data
+ * @size: size of the packet to send
+ *
+ * Function sends loopback packets on a test Tx ring.
+ */
+static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size)
+{
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	dma_addr_t dma;
+	u64 td_cmd;
+
+	tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use);
+	tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use];
+
+	dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(tx_ring->dev, dma))
+		return -EINVAL;
+
+	tx_desc->buf_addr = cpu_to_le64(dma);
+
+	/* These flags are required for a descriptor to be pushed out */
+	td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+	tx_desc->cmd_type_offset_bsz =
+		cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			    (td_cmd << ICE_TXD_QW1_CMD_S) |
+			    ((u64)0 << ICE_TXD_QW1_OFFSET_S) |
+			    ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			    ((u64)0 << ICE_TXD_QW1_L2TAG1_S));
+
+	tx_buf->next_to_watch = tx_desc;
+
+	/* Force memory write to complete before letting h/w know
+	 * there are new descriptors to fetch.
+	 */
+	wmb();
+
+	tx_ring->next_to_use++;
+	if (tx_ring->next_to_use >= tx_ring->count)
+		tx_ring->next_to_use = 0;
+
+	writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
+
+	/* Wait until the packets get transmitted to the receive queue. */
+	usleep_range(1000, 2000);
+	dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+#define ICE_LB_FRAME_SIZE 64
+/**
+ * ice_lbtest_receive_frames - receive and verify test frames
+ * @rx_ring: pointer to the receive ring
+ *
+ * Function receives loopback packets and verify their correctness.
+ * Returns number of received valid frames.
+ */
+static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
+{
+	struct ice_rx_buf *rx_buf;
+	int valid_frames, i;
+	u8 *received_buf;
+
+	valid_frames = 0;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		union ice_32b_rx_flex_desc *rx_desc;
+
+		rx_desc = ICE_RX_DESC(rx_ring, i);
+
+		if (!(rx_desc->wb.status_error0 &
+		    (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
+		     cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
+			continue;
+
+		rx_buf = &rx_ring->rx_buf[i];
+		received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
+
+		if (ice_lbtest_check_frame(received_buf))
+			valid_frames++;
+	}
+
+	return valid_frames;
+}
+
+/**
+ * ice_loopback_test - perform a loopback test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_loopback_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
+	struct ice_pf *pf = orig_vsi->back;
+	struct ice_ring *tx_ring, *rx_ring;
+	u8 broadcast[ETH_ALEN], ret = 0;
+	int num_frames, valid_frames;
+	struct device *dev;
+	u8 *tx_frame;
+	int i;
+
+	dev = ice_pf_to_dev(pf);
+	netdev_info(netdev, "loopback test\n");
+
+	test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
+	if (!test_vsi) {
+		netdev_err(netdev, "Failed to create a VSI for the loopback test\n");
+		return 1;
+	}
+
+	test_vsi->netdev = netdev;
+	tx_ring = test_vsi->tx_rings[0];
+	rx_ring = test_vsi->rx_rings[0];
+
+	if (ice_lbtest_prepare_rings(test_vsi)) {
+		ret = 2;
+		goto lbtest_vsi_close;
+	}
+
+	if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) {
+		ret = 3;
+		goto lbtest_rings_dis;
+	}
+
+	/* Enable MAC loopback in firmware */
+	if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) {
+		ret = 4;
+		goto lbtest_mac_dis;
+	}
+
+	/* Test VSI needs to receive broadcast packets */
+	eth_broadcast_addr(broadcast);
+	if (ice_fltr_add_mac(test_vsi, broadcast, ICE_FWD_TO_VSI)) {
+		ret = 5;
+		goto lbtest_mac_dis;
+	}
+
+	if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
+		ret = 7;
+		goto remove_mac_filters;
+	}
+
+	num_frames = min_t(int, tx_ring->count, 32);
+	for (i = 0; i < num_frames; i++) {
+		if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
+			ret = 8;
+			goto lbtest_free_frame;
+		}
+	}
+
+	valid_frames = ice_lbtest_receive_frames(rx_ring);
+	if (!valid_frames)
+		ret = 9;
+	else if (valid_frames != num_frames)
+		ret = 10;
+
+lbtest_free_frame:
+	devm_kfree(dev, tx_frame);
+remove_mac_filters:
+	if (ice_fltr_remove_mac(test_vsi, broadcast, ICE_FWD_TO_VSI))
+		netdev_err(netdev, "Could not remove MAC filter for the test VSI\n");
+lbtest_mac_dis:
+	/* Disable MAC loopback after the test is completed. */
+	if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
+		netdev_err(netdev, "Could not disable MAC loopback\n");
+lbtest_rings_dis:
+	if (ice_lbtest_disable_rings(test_vsi))
+		netdev_err(netdev, "Could not disable test rings\n");
+lbtest_vsi_close:
+	test_vsi->netdev = NULL;
+	if (ice_vsi_release(test_vsi))
+		netdev_err(netdev, "Failed to remove the test VSI\n");
+
+	return ret;
+}
+
+/**
+ * ice_intr_test - perform an interrupt test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_intr_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	u16 swic_old = pf->sw_int_count;
+
+	netdev_info(netdev, "interrupt test\n");
+
+	wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M |
+	     GLINT_DYN_CTL_INTENA_MSK_M |
+	     GLINT_DYN_CTL_SWINT_TRIG_M);
+
+	usleep_range(1000, 2000);
+	return (swic_old == pf->sw_int_count);
+}
+
+/**
+ * ice_self_test - handler function for performing a self-test by ethtool
+ * @netdev: network interface device structure
+ * @eth_test: ethtool_test structure
+ * @data: required by ethtool.self_test
+ *
+ * This function is called after invoking 'ethtool -t devname' command where
+ * devname is the name of the network device on which ethtool should operate.
+ * It performs a set of self-tests to check if a device works properly.
+ */
+static void
+ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
+	      u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		netdev_info(netdev, "offline testing starting\n");
+
+		set_bit(__ICE_TESTING, pf->state);
+
+		if (ice_active_vfs(pf)) {
+			dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+			data[ICE_ETH_TEST_REG] = 1;
+			data[ICE_ETH_TEST_EEPROM] = 1;
+			data[ICE_ETH_TEST_INTR] = 1;
+			data[ICE_ETH_TEST_LOOP] = 1;
+			data[ICE_ETH_TEST_LINK] = 1;
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+			clear_bit(__ICE_TESTING, pf->state);
+			goto skip_ol_tests;
+		}
+		/* If the device is online then take it offline */
+		if (if_running)
+			/* indicate we're in test mode */
+			ice_stop(netdev);
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev);
+		data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev);
+		data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev);
+		data[ICE_ETH_TEST_REG] = ice_reg_test(netdev);
+
+		if (data[ICE_ETH_TEST_LINK] ||
+		    data[ICE_ETH_TEST_EEPROM] ||
+		    data[ICE_ETH_TEST_LOOP] ||
+		    data[ICE_ETH_TEST_INTR] ||
+		    data[ICE_ETH_TEST_REG])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		clear_bit(__ICE_TESTING, pf->state);
+
+		if (if_running) {
+			int status = ice_open(netdev);
+
+			if (status) {
+				dev_err(dev, "Could not open device %s, err %d\n",
+					pf->int_name, status);
+			}
+		}
+	} else {
+		/* Online tests */
+		netdev_info(netdev, "online testing starting\n");
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		if (data[ICE_ETH_TEST_LINK])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Offline only tests, not run in online; pass by default */
+		data[ICE_ETH_TEST_REG] = 0;
+		data[ICE_ETH_TEST_EEPROM] = 0;
+		data[ICE_ETH_TEST_INTR] = 0;
+		data[ICE_ETH_TEST_LOOP] = 0;
+	}
+
+skip_ol_tests:
+	netdev_info(netdev, "testing finished\n");
+}
+
+static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	char *p = (char *)data;
+	unsigned int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ICE_VSI_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "%s",
+				 ice_gstrings_vsi_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
+
+		ice_for_each_alloc_txq(vsi, i) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "tx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+
+		ice_for_each_alloc_rxq(vsi, i) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "rx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+
+		if (vsi->type != ICE_VSI_PF)
+			return;
+
+		for (i = 0; i < ICE_PF_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "%s",
+				 ice_gstrings_pf_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "tx_priority_%u_xon.nic", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "tx_priority_%u_xoff.nic", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "rx_priority_%u_xon.nic", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "rx_priority_%u_xoff.nic", i);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	case ETH_SS_TEST:
+		memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "%s",
+				 ice_gstrings_priv_flags[i].name);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static int
+ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool led_active;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		led_active = true;
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		led_active = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ice_aq_set_port_id_led(np->vsi->port_info, !led_active, NULL))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_set_fec_cfg - Set link FEC options
+ * @netdev: network interface device structure
+ * @req_fec: FEC mode to configure
+ */
+static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_set_phy_cfg_data config = { 0 };
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+
+	pi = vsi->port_info;
+	if (!pi)
+		return -EOPNOTSUPP;
+
+	/* Changing the FEC parameters is not supported if not the PF VSI */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Proceed only if requesting different FEC mode */
+	if (pi->phy.curr_user_fec_req == req_fec)
+		return 0;
+
+	/* Copy the current user PHY configuration. The current user PHY
+	 * configuration is initialized during probe from PHY capabilities
+	 * software mode, and updated on set PHY configuration.
+	 */
+	memcpy(&config, &pi->phy.curr_user_phy_cfg, sizeof(config));
+
+	ice_cfg_phy_fec(pi, &config, req_fec);
+	config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+	if (ice_aq_set_phy_cfg(pi->hw, pi, &config, NULL))
+		return -EAGAIN;
+
+	/* Save requested FEC config */
+	pi->phy.curr_user_fec_req = req_fec;
+
+	return 0;
+}
+
+/**
+ * ice_set_fecparam - Set FEC link options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_fec_mode fec;
+
+	switch (fecparam->fec) {
+	case ETHTOOL_FEC_AUTO:
+		fec = ICE_FEC_AUTO;
+		break;
+	case ETHTOOL_FEC_RS:
+		fec = ICE_FEC_RS;
+		break;
+	case ETHTOOL_FEC_BASER:
+		fec = ICE_FEC_BASER;
+		break;
+	case ETHTOOL_FEC_OFF:
+	case ETHTOOL_FEC_NONE:
+		fec = ICE_FEC_NONE;
+		break;
+	default:
+		dev_warn(ice_pf_to_dev(vsi->back), "Unsupported FEC mode: %d\n",
+			 fecparam->fec);
+		return -EINVAL;
+	}
+
+	return ice_set_fec_cfg(netdev, fec);
+}
+
+/**
+ * ice_get_fecparam - Get link FEC options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_link_status *link_info;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	int err = 0;
+
+	pi = vsi->port_info;
+
+	if (!pi)
+		return -EOPNOTSUPP;
+	link_info = &pi->phy.link_info;
+
+	/* Set FEC mode based on negotiated link info */
+	switch (link_info->fec_info) {
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		fecparam->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	}
+
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+				     caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Set supported/configured FEC modes based on PHY capability */
+	if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC)
+		fecparam->fec |= ETHTOOL_FEC_AUTO;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fecparam->fec |= ETHTOOL_FEC_BASER;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		fecparam->fec |= ETHTOOL_FEC_RS;
+	if (caps->link_fec_options == 0)
+		fecparam->fec |= ETHTOOL_FEC_OFF;
+
+done:
+	kfree(caps);
+	return err;
+}
+
+/**
+ * ice_nway_reset - restart autonegotiation
+ * @netdev: network interface device structure
+ */
+static int ice_nway_reset(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+	enum ice_status status;
+
+	pi = vsi->port_info;
+	/* If VSI state is up, then restart autoneg with link up */
+	if (!test_bit(__ICE_DOWN, vsi->back->state))
+		status = ice_aq_set_link_restart_an(pi, true, NULL);
+	else
+		status = ice_aq_set_link_restart_an(pi, false, NULL);
+
+	if (status) {
+		netdev_info(netdev, "link restart failed, err %s aq_err %s\n",
+			    ice_stat_str(status),
+			    ice_aq_str(pi->hw->adminq.sq_last_status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_priv_flags - report device private flags
+ * @netdev: network interface device structure
+ *
+ * The get string set count and the string set should be matched for each
+ * flag returned.  Add new strings for each flag to the ice_gstrings_priv_flags
+ * array.
+ *
+ * Returns a u32 bitmap of flags.
+ */
+static u32 ice_get_priv_flags(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u32 i, ret_flags = 0;
+
+	for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+		const struct ice_priv_flag *priv_flag;
+
+		priv_flag = &ice_gstrings_priv_flags[i];
+
+		if (test_bit(priv_flag->bitno, pf->flags))
+			ret_flags |= BIT(i);
+	}
+
+	return ret_flags;
+}
+
+/**
+ * ice_set_priv_flags - set private flags
+ * @netdev: network interface device structure
+ * @flags: bit flags to be set
+ */
+static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	DECLARE_BITMAP(change_flags, ICE_PF_FLAGS_NBITS);
+	DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int ret = 0;
+	u32 i;
+
+	if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
+	set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+
+	bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
+	for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+		const struct ice_priv_flag *priv_flag;
+
+		priv_flag = &ice_gstrings_priv_flags[i];
+
+		if (flags & BIT(i))
+			set_bit(priv_flag->bitno, pf->flags);
+		else
+			clear_bit(priv_flag->bitno, pf->flags);
+	}
+
+	bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
+
+	/* Do not allow change to link-down-on-close when Total Port Shutdown
+	 * is enabled.
+	 */
+	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, change_flags) &&
+	    test_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) {
+		dev_err(dev, "Setting link-down-on-close not supported on this port\n");
+		set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
+		ret = -EINVAL;
+		goto ethtool_exit;
+	}
+
+	if (test_bit(ICE_FLAG_FW_LLDP_AGENT, change_flags)) {
+		if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) {
+			enum ice_status status;
+
+			/* Disable FW LLDP engine */
+			status = ice_cfg_lldp_mib_change(&pf->hw, false);
+
+			/* If unregistering for LLDP events fails, this is
+			 * not an error state, as there shouldn't be any
+			 * events to respond to.
+			 */
+			if (status)
+				dev_info(dev, "Failed to unreg for LLDP events\n");
+
+			/* The AQ call to stop the FW LLDP agent will generate
+			 * an error if the agent is already stopped.
+			 */
+			status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
+			if (status)
+				dev_warn(dev, "Fail to stop LLDP agent\n");
+			/* Use case for having the FW LLDP agent stopped
+			 * will likely not need DCB, so failure to init is
+			 * not a concern of ethtool
+			 */
+			status = ice_init_pf_dcb(pf, true);
+			if (status)
+				dev_warn(dev, "Fail to init DCB\n");
+
+			pf->dcbx_cap &= ~DCB_CAP_DCBX_LLD_MANAGED;
+			pf->dcbx_cap |= DCB_CAP_DCBX_HOST;
+		} else {
+			enum ice_status status;
+			bool dcbx_agent_status;
+
+			/* AQ command to start FW LLDP agent will return an
+			 * error if the agent is already started
+			 */
+			status = ice_aq_start_lldp(&pf->hw, true, NULL);
+			if (status)
+				dev_warn(dev, "Fail to start LLDP Agent\n");
+
+			/* AQ command to start FW DCBX agent will fail if
+			 * the agent is already started
+			 */
+			status = ice_aq_start_stop_dcbx(&pf->hw, true,
+							&dcbx_agent_status,
+							NULL);
+			if (status)
+				dev_dbg(dev, "Failed to start FW DCBX\n");
+
+			dev_info(dev, "FW DCBX agent is %s\n",
+				 dcbx_agent_status ? "ACTIVE" : "DISABLED");
+
+			/* Failure to configure MIB change or init DCB is not
+			 * relevant to ethtool.  Print notification that
+			 * registration/init failed but do not return error
+			 * state to ethtool
+			 */
+			status = ice_init_pf_dcb(pf, true);
+			if (status)
+				dev_dbg(dev, "Fail to init DCB\n");
+
+			/* Remove rule to direct LLDP packets to default VSI.
+			 * The FW LLDP engine will now be consuming them.
+			 */
+			ice_cfg_sw_lldp(vsi, false, false);
+
+			/* Register for MIB change events */
+			status = ice_cfg_lldp_mib_change(&pf->hw, true);
+			if (status)
+				dev_dbg(dev, "Fail to enable MIB change events\n");
+
+			pf->dcbx_cap &= ~DCB_CAP_DCBX_HOST;
+			pf->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
+
+			ice_nway_reset(netdev);
+		}
+	}
+	if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+		/* down and up VSI so that changes of Rx cfg are reflected. */
+		ice_down(vsi);
+		ice_up(vsi);
+	}
+	/* don't allow modification of this flag when a single VF is in
+	 * promiscuous mode because it's not supported
+	 */
+	if (test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, change_flags) &&
+	    ice_is_any_vf_in_promisc(pf)) {
+		dev_err(dev, "Changing vf-true-promisc-support flag while VF(s) are in promiscuous mode not supported\n");
+		/* toggle bit back to previous state */
+		change_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags);
+		ret = -EAGAIN;
+	}
+ethtool_exit:
+	clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+	return ret;
+}
+
+static int ice_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		/* The number (and order) of strings reported *must* remain
+		 * constant for a given netdevice. This function must not
+		 * report a different number based on run time parameters
+		 * (such as the number of queues in use, or the setting of
+		 * a private ethtool flag). This is due to the nature of the
+		 * ethtool stats API.
+		 *
+		 * Userspace programs such as ethtool must make 3 separate
+		 * ioctl requests, one for size, one for the strings, and
+		 * finally one for the stats. Since these cross into
+		 * userspace, changes to the number or size could result in
+		 * undefined memory access or incorrect string<->value
+		 * correlations for statistics.
+		 *
+		 * Even if it appears to be safe, changes to the size or
+		 * order of strings will suffer from race conditions and are
+		 * not safe.
+		 */
+		return ICE_ALL_STATS_LEN(netdev);
+	case ETH_SS_TEST:
+		return ICE_TEST_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return ICE_PRIV_FLAG_ARRAY_SIZE;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void
+ice_get_ethtool_stats(struct net_device *netdev,
+		      struct ethtool_stats __always_unused *stats, u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_ring *ring;
+	unsigned int j;
+	int i = 0;
+	char *p;
+
+	ice_update_pf_stats(pf);
+	ice_update_vsi_stats(vsi);
+
+	for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
+		p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
+		data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
+			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	/* populate per queue stats */
+	rcu_read_lock();
+
+	ice_for_each_alloc_txq(vsi, j) {
+		ring = READ_ONCE(vsi->tx_rings[j]);
+		if (ring) {
+			data[i++] = ring->stats.pkts;
+			data[i++] = ring->stats.bytes;
+		} else {
+			data[i++] = 0;
+			data[i++] = 0;
+		}
+	}
+
+	ice_for_each_alloc_rxq(vsi, j) {
+		ring = READ_ONCE(vsi->rx_rings[j]);
+		if (ring) {
+			data[i++] = ring->stats.pkts;
+			data[i++] = ring->stats.bytes;
+		} else {
+			data[i++] = 0;
+			data[i++] = 0;
+		}
+	}
+
+	rcu_read_unlock();
+
+	if (vsi->type != ICE_VSI_PF)
+		return;
+
+	for (j = 0; j < ICE_PF_STATS_LEN; j++) {
+		p = (char *)pf + ice_gstrings_pf_stats[j].stat_offset;
+		data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat ==
+			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+		data[i++] = pf->stats.priority_xon_tx[j];
+		data[i++] = pf->stats.priority_xoff_tx[j];
+	}
+
+	for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+		data[i++] = pf->stats.priority_xon_rx[j];
+		data[i++] = pf->stats.priority_xoff_rx[j];
+	}
+}
+
+#define ICE_PHY_TYPE_LOW_MASK_MIN_1G	(ICE_PHY_TYPE_LOW_100BASE_TX | \
+					 ICE_PHY_TYPE_LOW_100M_SGMII)
+
+#define ICE_PHY_TYPE_LOW_MASK_MIN_25G	(ICE_PHY_TYPE_LOW_MASK_MIN_1G | \
+					 ICE_PHY_TYPE_LOW_1000BASE_T | \
+					 ICE_PHY_TYPE_LOW_1000BASE_SX | \
+					 ICE_PHY_TYPE_LOW_1000BASE_LX | \
+					 ICE_PHY_TYPE_LOW_1000BASE_KX | \
+					 ICE_PHY_TYPE_LOW_1G_SGMII | \
+					 ICE_PHY_TYPE_LOW_2500BASE_T | \
+					 ICE_PHY_TYPE_LOW_2500BASE_X | \
+					 ICE_PHY_TYPE_LOW_2500BASE_KX | \
+					 ICE_PHY_TYPE_LOW_5GBASE_T | \
+					 ICE_PHY_TYPE_LOW_5GBASE_KR | \
+					 ICE_PHY_TYPE_LOW_10GBASE_T | \
+					 ICE_PHY_TYPE_LOW_10G_SFI_DA | \
+					 ICE_PHY_TYPE_LOW_10GBASE_SR | \
+					 ICE_PHY_TYPE_LOW_10GBASE_LR | \
+					 ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 | \
+					 ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC | \
+					 ICE_PHY_TYPE_LOW_10G_SFI_C2C)
+
+#define ICE_PHY_TYPE_LOW_MASK_100G	(ICE_PHY_TYPE_LOW_100GBASE_CR4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_SR4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_LR4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_KR4 | \
+					 ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
+					 ICE_PHY_TYPE_LOW_100G_CAUI4 | \
+					 ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
+					 ICE_PHY_TYPE_LOW_100G_AUI4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_CP2 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_SR2 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_DR)
+
+#define ICE_PHY_TYPE_HIGH_MASK_100G	(ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
+					 ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC |\
+					 ICE_PHY_TYPE_HIGH_100G_CAUI2 | \
+					 ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
+					 ICE_PHY_TYPE_HIGH_100G_AUI2)
+
+/**
+ * ice_mask_min_supported_speeds
+ * @phy_types_high: PHY type high
+ * @phy_types_low: PHY type low to apply minimum supported speeds mask
+ *
+ * Apply minimum supported speeds mask to PHY type low. These are the speeds
+ * for ethtool supported link mode.
+ */
+static
+void ice_mask_min_supported_speeds(u64 phy_types_high, u64 *phy_types_low)
+{
+	/* if QSFP connection with 100G speed, minimum supported speed is 25G */
+	if (*phy_types_low & ICE_PHY_TYPE_LOW_MASK_100G ||
+	    phy_types_high & ICE_PHY_TYPE_HIGH_MASK_100G)
+		*phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_25G;
+	else
+		*phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G;
+}
+
+#define ice_ethtool_advertise_link_mode(aq_link_speed, ethtool_link_mode)    \
+	do {								     \
+		if (req_speeds & (aq_link_speed) ||			     \
+		    (!req_speeds &&					     \
+		     (adv_phy_type_lo & phy_type_mask_lo ||		     \
+		      adv_phy_type_hi & phy_type_mask_hi)))		     \
+			ethtool_link_ksettings_add_link_mode(ks, advertising,\
+							ethtool_link_mode);  \
+	} while (0)
+
+/**
+ * ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes
+ * @netdev: network interface device structure
+ * @ks: ethtool link ksettings struct to fill out
+ */
+static void
+ice_phy_type_to_ethtool(struct net_device *netdev,
+			struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u64 phy_type_mask_lo = 0;
+	u64 phy_type_mask_hi = 0;
+	u64 adv_phy_type_lo = 0;
+	u64 adv_phy_type_hi = 0;
+	u64 phy_types_high = 0;
+	u64 phy_types_low = 0;
+	u16 req_speeds;
+
+	req_speeds = vsi->port_info->phy.link_info.req_speeds;
+
+	/* Check if lenient mode is supported and enabled, or in strict mode.
+	 *
+	 * In lenient mode the Supported link modes are the PHY types without
+	 * media. The Advertising link mode is either 1. the user requested
+	 * speed, 2. the override PHY mask, or 3. the PHY types with media.
+	 *
+	 * In strict mode Supported link mode are the PHY type with media,
+	 * and Advertising link modes are the media PHY type or the speed
+	 * requested by user.
+	 */
+	if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
+		struct ice_link_default_override_tlv *ldo;
+
+		ldo = &pf->link_dflt_override;
+		phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo);
+		phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi);
+
+		ice_mask_min_supported_speeds(phy_types_high, &phy_types_low);
+
+		/* If override enabled and PHY mask set, then
+		 * Advertising link mode is the intersection of the PHY
+		 * types without media and the override PHY mask.
+		 */
+		if (ldo->options & ICE_LINK_OVERRIDE_EN &&
+		    (ldo->phy_type_low || ldo->phy_type_high)) {
+			adv_phy_type_lo =
+				le64_to_cpu(pf->nvm_phy_type_lo) &
+				ldo->phy_type_low;
+			adv_phy_type_hi =
+				le64_to_cpu(pf->nvm_phy_type_hi) &
+				ldo->phy_type_high;
+		}
+	} else {
+		phy_types_low = vsi->port_info->phy.phy_type_low;
+		phy_types_high = vsi->port_info->phy.phy_type_high;
+	}
+
+	/* If Advertising link mode PHY type is not using override PHY type,
+	 * then use PHY type with media.
+	 */
+	if (!adv_phy_type_lo && !adv_phy_type_hi) {
+		adv_phy_type_lo = vsi->port_info->phy.phy_type_low;
+		adv_phy_type_hi = vsi->port_info->phy.phy_type_high;
+	}
+
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_100BASE_TX |
+			   ICE_PHY_TYPE_LOW_100M_SGMII;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100baseT_Full);
+
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100MB,
+						100baseT_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_T |
+			   ICE_PHY_TYPE_LOW_1G_SGMII;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+						1000baseT_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_KX;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseKX_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+						1000baseKX_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_SX |
+			   ICE_PHY_TYPE_LOW_1000BASE_LX;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseX_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+						1000baseX_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_T;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseT_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB,
+						2500baseT_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_X |
+			   ICE_PHY_TYPE_LOW_2500BASE_KX;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseX_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB,
+						2500baseX_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_5GBASE_T |
+			   ICE_PHY_TYPE_LOW_5GBASE_KR;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     5000baseT_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_5GB,
+						5000baseT_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_T |
+			   ICE_PHY_TYPE_LOW_10G_SFI_DA |
+			   ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |
+			   ICE_PHY_TYPE_LOW_10G_SFI_C2C;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+						10000baseT_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_KR_CR1;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKR_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+						10000baseKR_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_SR;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseSR_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+						10000baseSR_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_LR;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLR_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+						10000baseLR_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_T |
+			   ICE_PHY_TYPE_LOW_25GBASE_CR |
+			   ICE_PHY_TYPE_LOW_25GBASE_CR_S |
+			   ICE_PHY_TYPE_LOW_25GBASE_CR1 |
+			   ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |
+			   ICE_PHY_TYPE_LOW_25G_AUI_C2C;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+						25000baseCR_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_SR |
+			   ICE_PHY_TYPE_LOW_25GBASE_LR;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseSR_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+						25000baseSR_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_KR |
+			   ICE_PHY_TYPE_LOW_25GBASE_KR_S |
+			   ICE_PHY_TYPE_LOW_25GBASE_KR1;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseKR_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+						25000baseKR_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_KR4;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseKR4_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+						40000baseKR4_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_CR4 |
+			   ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC |
+			   ICE_PHY_TYPE_LOW_40G_XLAUI;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+						40000baseCR4_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_SR4;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseSR4_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+						40000baseSR4_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_LR4;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+						40000baseLR4_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_CR2 |
+			   ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC |
+			   ICE_PHY_TYPE_LOW_50G_LAUI2 |
+			   ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC |
+			   ICE_PHY_TYPE_LOW_50G_AUI2 |
+			   ICE_PHY_TYPE_LOW_50GBASE_CP |
+			   ICE_PHY_TYPE_LOW_50GBASE_SR |
+			   ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC |
+			   ICE_PHY_TYPE_LOW_50G_AUI1;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseCR2_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+						50000baseCR2_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_KR2 |
+			   ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseKR2_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+						50000baseKR2_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_SR2 |
+			   ICE_PHY_TYPE_LOW_50GBASE_LR2 |
+			   ICE_PHY_TYPE_LOW_50GBASE_FR |
+			   ICE_PHY_TYPE_LOW_50GBASE_LR;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     50000baseSR2_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+						50000baseSR2_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_CR4 |
+			   ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC |
+			   ICE_PHY_TYPE_LOW_100G_CAUI4 |
+			   ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC |
+			   ICE_PHY_TYPE_LOW_100G_AUI4 |
+			   ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 |
+			   ICE_PHY_TYPE_LOW_100GBASE_CP2;
+	phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC |
+			   ICE_PHY_TYPE_HIGH_100G_CAUI2 |
+			   ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC |
+			   ICE_PHY_TYPE_HIGH_100G_AUI2;
+	if (phy_types_low & phy_type_mask_lo ||
+	    phy_types_high & phy_type_mask_hi) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseCR4_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+						100000baseCR4_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_SR4 |
+			   ICE_PHY_TYPE_LOW_100GBASE_SR2;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseSR4_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+						100000baseSR4_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_LR4 |
+			   ICE_PHY_TYPE_LOW_100GBASE_DR;
+	if (phy_types_low & phy_type_mask_lo) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseLR4_ER4_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+						100000baseLR4_ER4_Full);
+	}
+
+	phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_KR4 |
+			   ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4;
+	phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4;
+	if (phy_types_low & phy_type_mask_lo ||
+	    phy_types_high & phy_type_mask_hi) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100000baseKR4_Full);
+		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+						100000baseKR4_Full);
+	}
+}
+
+#define TEST_SET_BITS_TIMEOUT	50
+#define TEST_SET_BITS_SLEEP_MAX	2000
+#define TEST_SET_BITS_SLEEP_MIN	1000
+
+/**
+ * ice_get_settings_link_up - Get Link settings for when link is up
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ */
+static void
+ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
+			 struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_port_info *pi = np->vsi->port_info;
+	struct ice_link_status *link_info;
+	struct ice_vsi *vsi = np->vsi;
+
+	link_info = &vsi->port_info->phy.link_info;
+
+	/* Get supported and advertised settings from PHY ability with media */
+	ice_phy_type_to_ethtool(netdev, ks);
+
+	switch (link_info->link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		ks->base.speed = SPEED_100000;
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		ks->base.speed = SPEED_50000;
+		break;
+	case ICE_AQ_LINK_SPEED_40GB:
+		ks->base.speed = SPEED_40000;
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		ks->base.speed = SPEED_25000;
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		ks->base.speed = SPEED_20000;
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+		ks->base.speed = SPEED_10000;
+		break;
+	case ICE_AQ_LINK_SPEED_5GB:
+		ks->base.speed = SPEED_5000;
+		break;
+	case ICE_AQ_LINK_SPEED_2500MB:
+		ks->base.speed = SPEED_2500;
+		break;
+	case ICE_AQ_LINK_SPEED_1000MB:
+		ks->base.speed = SPEED_1000;
+		break;
+	case ICE_AQ_LINK_SPEED_100MB:
+		ks->base.speed = SPEED_100;
+		break;
+	default:
+		netdev_info(netdev, "WARNING: Unrecognized link_speed (0x%x).\n",
+			    link_info->link_speed);
+		break;
+	}
+	ks->base.duplex = DUPLEX_FULL;
+
+	if (link_info->an_info & ICE_AQ_AN_COMPLETED)
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Autoneg);
+
+	/* Set flow control negotiated Rx/Tx pause */
+	switch (pi->fc.current_mode) {
+	case ICE_FC_FULL:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
+		break;
+	case ICE_FC_TX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_RX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_PFC:
+	default:
+		ethtool_link_ksettings_del_link_mode(ks, lp_advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	}
+}
+
+/**
+ * ice_get_settings_link_down - Get the Link settings when link is down
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ *
+ * Reports link settings that can be determined when link is down
+ */
+static void
+ice_get_settings_link_down(struct ethtool_link_ksettings *ks,
+			   struct net_device *netdev)
+{
+	/* link is down and the driver needs to fall back on
+	 * supported PHY types to figure out what info to display
+	 */
+	ice_phy_type_to_ethtool(netdev, ks);
+
+	/* With no link, speed and duplex are unknown */
+	ks->base.speed = SPEED_UNKNOWN;
+	ks->base.duplex = DUPLEX_UNKNOWN;
+}
+
+/**
+ * ice_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Reports speed/duplex settings based on media_type
+ */
+static int
+ice_get_link_ksettings(struct net_device *netdev,
+		       struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_link_status *hw_link_info;
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_status status;
+	int err = 0;
+
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
+	ethtool_link_ksettings_zero_link_mode(ks, lp_advertising);
+	hw_link_info = &vsi->port_info->phy.link_info;
+
+	/* set speed and duplex */
+	if (hw_link_info->link_info & ICE_AQ_LINK_UP)
+		ice_get_settings_link_up(ks, netdev);
+	else
+		ice_get_settings_link_down(ks, netdev);
+
+	/* set autoneg settings */
+	ks->base.autoneg = (hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ?
+		AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	/* set media type settings */
+	switch (vsi->port_info->phy.media_type) {
+	case ICE_MEDIA_FIBER:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ks->base.port = PORT_FIBRE;
+		break;
+	case ICE_MEDIA_BASET:
+		ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+		ks->base.port = PORT_TP;
+		break;
+	case ICE_MEDIA_BACKPLANE:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Backplane);
+		ks->base.port = PORT_NONE;
+		break;
+	case ICE_MEDIA_DA:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+		ks->base.port = PORT_DA;
+		break;
+	default:
+		ks->base.port = PORT_OTHER;
+		break;
+	}
+
+	/* flow control is symmetric and always supported */
+	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
+	if (status) {
+		err = -EIO;
+		goto done;
+	}
+
+	/* Set the advertised flow control based on the PHY capability */
+	if ((caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
+	    (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+	} else if (caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+	} else if (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+	} else {
+		ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, advertising,
+						     Asym_Pause);
+	}
+
+	/* Set advertised FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL);
+	if (status) {
+		err = -EIO;
+		goto done;
+	}
+
+	/* Set supported FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+
+	/* Set supported and advertised autoneg */
+	if (ice_is_phy_caps_an_enabled(caps)) {
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+	}
+
+done:
+	kfree(caps);
+	return err;
+}
+
+/**
+ * ice_ksettings_find_adv_link_speed - Find advertising link speed
+ * @ks: ethtool ksettings
+ */
+static u16
+ice_ksettings_find_adv_link_speed(const struct ethtool_link_ksettings *ks)
+{
+	u16 adv_link_speed = 0;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100baseT_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_100MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseX_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseKX_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  2500baseT_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  2500baseX_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  5000baseT_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_5GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseKR_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseSR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseLR_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseCR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseSR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseKR_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_25GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseCR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseSR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseLR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseKR4_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_40GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseCR2_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseKR2_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseSR2_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseCR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseSR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseLR4_ER4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseKR4_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_100GB;
+
+	return adv_link_speed;
+}
+
+/**
+ * ice_setup_autoneg
+ * @p: port info
+ * @ks: ethtool_link_ksettings
+ * @config: configuration that will be sent down to FW
+ * @autoneg_enabled: autonegotiation is enabled or not
+ * @autoneg_changed: will there a change in autonegotiation
+ * @netdev: network interface device structure
+ *
+ * Setup PHY autonegotiation feature
+ */
+static int
+ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks,
+		  struct ice_aqc_set_phy_cfg_data *config,
+		  u8 autoneg_enabled, u8 *autoneg_changed,
+		  struct net_device *netdev)
+{
+	int err = 0;
+
+	*autoneg_changed = 0;
+
+	/* Check autoneg */
+	if (autoneg_enabled == AUTONEG_ENABLE) {
+		/* If autoneg was not already enabled */
+		if (!(p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)) {
+			/* If autoneg is not supported, return error */
+			if (!ethtool_link_ksettings_test_link_mode(ks,
+								   supported,
+								   Autoneg)) {
+				netdev_info(netdev, "Autoneg not supported on this phy.\n");
+				err = -EINVAL;
+			} else {
+				/* Autoneg is allowed to change */
+				config->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+				*autoneg_changed = 1;
+			}
+		}
+	} else {
+		/* If autoneg is currently enabled */
+		if (p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) {
+			/* If autoneg is supported 10GBASE_T is the only PHY
+			 * that can disable it, so otherwise return error
+			 */
+			if (ethtool_link_ksettings_test_link_mode(ks,
+								  supported,
+								  Autoneg)) {
+				netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
+				err = -EINVAL;
+			} else {
+				/* Autoneg is allowed to change */
+				config->caps &= ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+				*autoneg_changed = 1;
+			}
+		}
+	}
+
+	return err;
+}
+
+/**
+ * ice_set_phy_type_from_speed - set phy_types based on speeds
+ * and advertised modes
+ * @ks: ethtool link ksettings struct
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @adv_link_speed: targeted link speeds bitmap
+ */
+static void
+ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks,
+			    u64 *phy_type_low, u64 *phy_type_high,
+			    u16 adv_link_speed)
+{
+	/* Handle 1000M speed in a special way because ice_update_phy_type
+	 * enables all link modes, but having mixed copper and optical
+	 * standards is not supported.
+	 */
+	adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseT_Full))
+		*phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T |
+				 ICE_PHY_TYPE_LOW_1G_SGMII;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseKX_Full))
+		*phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseX_Full))
+		*phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX |
+				 ICE_PHY_TYPE_LOW_1000BASE_LX;
+
+	ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed);
+}
+
+/**
+ * ice_set_link_ksettings - Set Speed and Duplex
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Set speed/duplex per media_types advertised/forced
+ */
+static int
+ice_set_link_ksettings(struct net_device *netdev,
+		       const struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ethtool_link_ksettings safe_ks, copy_ks;
+	u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT;
+	struct ice_aqc_get_phy_caps_data *phy_caps;
+	struct ice_aqc_set_phy_cfg_data config;
+	u16 adv_link_speed, curr_link_speed;
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_port_info *pi;
+	u8 autoneg_changed = 0;
+	enum ice_status status;
+	u64 phy_type_high = 0;
+	u64 phy_type_low = 0;
+	int err = 0;
+	bool linkup;
+
+	pi = np->vsi->port_info;
+
+	if (!pi)
+		return -EOPNOTSUPP;
+
+	if (pi->phy.media_type != ICE_MEDIA_BASET &&
+	    pi->phy.media_type != ICE_MEDIA_FIBER &&
+	    pi->phy.media_type != ICE_MEDIA_BACKPLANE &&
+	    pi->phy.media_type != ICE_MEDIA_DA &&
+	    pi->phy.link_info.link_info & ICE_AQ_LINK_UP)
+		return -EOPNOTSUPP;
+
+	phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL);
+	if (!phy_caps)
+		return -ENOMEM;
+
+	/* Get the PHY capabilities based on media */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+				     phy_caps, NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* copy the ksettings to copy_ks to avoid modifying the original */
+	memcpy(&copy_ks, ks, sizeof(copy_ks));
+
+	/* save autoneg out of ksettings */
+	autoneg = copy_ks.base.autoneg;
+
+	memset(&safe_ks, 0, sizeof(safe_ks));
+
+	/* Get link modes supported by hardware.*/
+	ice_phy_type_to_ethtool(netdev, &safe_ks);
+
+	/* and check against modes requested by user.
+	 * Return an error if unsupported mode was set.
+	 */
+	if (!bitmap_subset(copy_ks.link_modes.advertising,
+			   safe_ks.link_modes.supported,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+		if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags))
+			netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+		err = -EINVAL;
+		goto done;
+	}
+
+	/* get our own copy of the bits to check against */
+	memset(&safe_ks, 0, sizeof(safe_ks));
+	safe_ks.base.cmd = copy_ks.base.cmd;
+	safe_ks.base.link_mode_masks_nwords =
+		copy_ks.base.link_mode_masks_nwords;
+	ice_get_link_ksettings(netdev, &safe_ks);
+
+	/* set autoneg back to what it currently is */
+	copy_ks.base.autoneg = safe_ks.base.autoneg;
+	/* we don't compare the speed */
+	copy_ks.base.speed = safe_ks.base.speed;
+
+	/* If copy_ks.base and safe_ks.base are not the same now, then they are
+	 * trying to set something that we do not support.
+	 */
+	if (memcmp(&copy_ks.base, &safe_ks.base, sizeof(copy_ks.base))) {
+		err = -EOPNOTSUPP;
+		goto done;
+	}
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout) {
+			err = -EBUSY;
+			goto done;
+		}
+		usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
+	}
+
+	/* Copy the current user PHY configuration. The current user PHY
+	 * configuration is initialized during probe from PHY capabilities
+	 * software mode, and updated on set PHY configuration.
+	 */
+	memcpy(&config, &pi->phy.curr_user_phy_cfg, sizeof(config));
+
+	config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+	/* Check autoneg */
+	err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed,
+				netdev);
+
+	if (err)
+		goto done;
+
+	/* Call to get the current link speed */
+	pi->phy.get_link_info = true;
+	status = ice_get_link_status(pi, &linkup);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	curr_link_speed = pi->phy.curr_user_speed_req;
+	adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
+
+	/* If speed didn't get set, set it to what it currently is.
+	 * This is needed because if advertise is 0 (as it is when autoneg
+	 * is disabled) then speed won't get set.
+	 */
+	if (!adv_link_speed)
+		adv_link_speed = curr_link_speed;
+
+	/* Convert the advertise link speeds to their corresponded PHY_TYPE */
+	ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high,
+				    adv_link_speed);
+
+	if (!autoneg_changed && adv_link_speed == curr_link_speed) {
+		netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
+		goto done;
+	}
+
+	/* save the requested speeds */
+	pi->phy.link_info.req_speeds = adv_link_speed;
+
+	/* set link and auto negotiation so changes take effect */
+	config.caps |= ICE_AQ_PHY_ENA_LINK;
+
+	/* check if there is a PHY type for the requested advertised speed */
+	if (!(phy_type_low || phy_type_high)) {
+		netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* intersect requested advertised speed PHY types with media PHY types
+	 * for set PHY configuration
+	 */
+	config.phy_type_high = cpu_to_le64(phy_type_high) &
+			phy_caps->phy_type_high;
+	config.phy_type_low = cpu_to_le64(phy_type_low) &
+			phy_caps->phy_type_low;
+
+	if (!(config.phy_type_high || config.phy_type_low)) {
+		/* If there is no intersection and lenient mode is enabled, then
+		 * intersect the requested advertised speed with NVM media type
+		 * PHY types.
+		 */
+		if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
+			config.phy_type_high = cpu_to_le64(phy_type_high) &
+					       pf->nvm_phy_type_hi;
+			config.phy_type_low = cpu_to_le64(phy_type_low) &
+					      pf->nvm_phy_type_lo;
+		} else {
+			netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+			err = -EAGAIN;
+			goto done;
+		}
+	}
+
+	/* If link is up put link down */
+	if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) {
+		/* Tell the OS link is going down, the link will go
+		 * back up when fw says it is ready asynchronously
+		 */
+		ice_print_link_msg(np->vsi, false);
+		netif_carrier_off(netdev);
+		netif_tx_stop_all_queues(netdev);
+	}
+
+	/* make the aq call */
+	status = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL);
+	if (status) {
+		netdev_info(netdev, "Set phy config failed,\n");
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Save speed request */
+	pi->phy.curr_user_speed_req = adv_link_speed;
+done:
+	kfree(phy_caps);
+	clear_bit(__ICE_CFG_BUSY, pf->state);
+
+	return err;
+}
+
+/**
+ * ice_parse_hdrs - parses headers from RSS hash input
+ * @nfc: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * header types for RSS configuration
+ */
+static u32 ice_parse_hdrs(struct ethtool_rxnfc *nfc)
+{
+	u32 hdrs = ICE_FLOW_SEG_HDR_NONE;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case UDP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case SCTP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case TCP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	case UDP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	case SCTP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	default:
+		break;
+	}
+	return hdrs;
+}
+
+#define ICE_FLOW_HASH_FLD_IPV4_SA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)
+#define ICE_FLOW_HASH_FLD_IPV6_SA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)
+#define ICE_FLOW_HASH_FLD_IPV4_DA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)
+#define ICE_FLOW_HASH_FLD_IPV6_DA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)
+#define ICE_FLOW_HASH_FLD_TCP_SRC_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_TCP_DST_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_UDP_SRC_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_UDP_DST_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_SCTP_SRC_PORT	\
+	BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_SCTP_DST_PORT	\
+	BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)
+
+/**
+ * ice_parse_hash_flds - parses hash fields from RSS hash input
+ * @nfc: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * hash fields for RSS configuration
+ */
+static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc)
+{
+	u64 hfld = ICE_HASH_INVALID;
+
+	if (nfc->data & RXH_IP_SRC || nfc->data & RXH_IP_DST) {
+		switch (nfc->flow_type) {
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+			if (nfc->data & RXH_IP_SRC)
+				hfld |= ICE_FLOW_HASH_FLD_IPV4_SA;
+			if (nfc->data & RXH_IP_DST)
+				hfld |= ICE_FLOW_HASH_FLD_IPV4_DA;
+			break;
+		case TCP_V6_FLOW:
+		case UDP_V6_FLOW:
+		case SCTP_V6_FLOW:
+			if (nfc->data & RXH_IP_SRC)
+				hfld |= ICE_FLOW_HASH_FLD_IPV6_SA;
+			if (nfc->data & RXH_IP_DST)
+				hfld |= ICE_FLOW_HASH_FLD_IPV6_DA;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (nfc->data & RXH_L4_B_0_1 || nfc->data & RXH_L4_B_2_3) {
+		switch (nfc->flow_type) {
+		case TCP_V4_FLOW:
+		case TCP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_TCP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_TCP_DST_PORT;
+			break;
+		case UDP_V4_FLOW:
+		case UDP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_UDP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_UDP_DST_PORT;
+			break;
+		case SCTP_V4_FLOW:
+		case SCTP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_SCTP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_SCTP_DST_PORT;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return hfld;
+}
+
+/**
+ * ice_set_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @vsi: the VSI being configured
+ * @nfc: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ */
+static int
+ice_set_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+	u64 hashed_flds;
+	u32 hdrs;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	hashed_flds = ice_parse_hash_flds(nfc);
+	if (hashed_flds == ICE_HASH_INVALID) {
+		dev_dbg(dev, "Invalid hash fields, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	hdrs = ice_parse_hdrs(nfc);
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
+		dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	status = ice_add_rss_cfg(&pf->hw, vsi->idx, hashed_flds, hdrs);
+	if (status) {
+		dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rss_hash_opt - Retrieve hash fields for a given flow-type
+ * @vsi: the VSI being configured
+ * @nfc: ethtool rxnfc command
+ */
+static void
+ice_get_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u64 hash_flds;
+	u32 hdrs;
+
+	dev = ice_pf_to_dev(pf);
+
+	nfc->data = 0;
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	hdrs = ice_parse_hdrs(nfc);
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
+		dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	hash_flds = ice_get_rss_cfg(&pf->hw, vsi->idx, hdrs);
+	if (hash_flds == ICE_HASH_INVALID) {
+		dev_dbg(dev, "No hash fields found for the given header type, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_SA ||
+	    hash_flds & ICE_FLOW_HASH_FLD_IPV6_SA)
+		nfc->data |= (u64)RXH_IP_SRC;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_DA ||
+	    hash_flds & ICE_FLOW_HASH_FLD_IPV6_DA)
+		nfc->data |= (u64)RXH_IP_DST;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_TCP_SRC_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_UDP_SRC_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_SCTP_SRC_PORT)
+		nfc->data |= (u64)RXH_L4_B_0_1;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_TCP_DST_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_UDP_DST_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_SCTP_DST_PORT)
+		nfc->data |= (u64)RXH_L4_B_2_3;
+}
+
+/**
+ * ice_set_rxnfc - command to set Rx flow rules.
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns 0 for success and negative values for errors
+ */
+static int ice_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		return ice_add_fdir_ethtool(vsi, cmd);
+	case ETHTOOL_SRXCLSRLDEL:
+		return ice_del_fdir_ethtool(vsi, cmd);
+	case ETHTOOL_SRXFH:
+		return ice_set_rss_hash_opt(vsi, cmd);
+	default:
+		break;
+	}
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ice_get_rxnfc - command to get Rx flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: buffer to rturn Rx flow classification rules
+ *
+ * Returns Success if the command is supported.
+ */
+static int
+ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+	      u32 __always_unused *rule_locs)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret = -EOPNOTSUPP;
+	struct ice_hw *hw;
+
+	hw = &vsi->back->hw;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = vsi->rss_size;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = hw->fdir_active_fltr;
+		/* report total rule count */
+		cmd->data = ice_get_fdir_cnt_all(hw);
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = ice_get_ethtool_fdir_entry(hw, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = ice_get_fdir_fltr_ids(hw, cmd, (u32 *)rule_locs);
+		break;
+	case ETHTOOL_GRXFH:
+		ice_get_rss_hash_opt(vsi, cmd);
+		ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void
+ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	ring->rx_max_pending = ICE_MAX_NUM_DESC;
+	ring->tx_max_pending = ICE_MAX_NUM_DESC;
+	ring->rx_pending = vsi->rx_rings[0]->count;
+	ring->tx_pending = vsi->tx_rings[0]->count;
+
+	/* Rx mini and jumbo rings are not supported */
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_mini_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+
+static int
+ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
+{
+	struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_ring *xdp_rings = NULL;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int i, timeout = 50, err = 0;
+	u16 new_rx_cnt, new_tx_cnt;
+
+	if (ring->tx_pending > ICE_MAX_NUM_DESC ||
+	    ring->tx_pending < ICE_MIN_NUM_DESC ||
+	    ring->rx_pending > ICE_MAX_NUM_DESC ||
+	    ring->rx_pending < ICE_MIN_NUM_DESC) {
+		netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n",
+			   ring->tx_pending, ring->rx_pending,
+			   ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC,
+			   ICE_REQ_DESC_MULTIPLE);
+		return -EINVAL;
+	}
+
+	new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
+	if (new_tx_cnt != ring->tx_pending)
+		netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
+			    new_tx_cnt);
+	new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE);
+	if (new_rx_cnt != ring->rx_pending)
+		netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
+			    new_rx_cnt);
+
+	/* if nothing to do return success */
+	if (new_tx_cnt == vsi->tx_rings[0]->count &&
+	    new_rx_cnt == vsi->rx_rings[0]->count) {
+		netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
+		return 0;
+	}
+
+	/* If there is a AF_XDP UMEM attached to any of Rx rings,
+	 * disallow changing the number of descriptors -- regardless
+	 * if the netdev is running or not.
+	 */
+	if (ice_xsk_any_rx_ring_ena(vsi))
+		return -EBUSY;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	/* set for the next time the netdev is started */
+	if (!netif_running(vsi->netdev)) {
+		for (i = 0; i < vsi->alloc_txq; i++)
+			vsi->tx_rings[i]->count = new_tx_cnt;
+		for (i = 0; i < vsi->alloc_rxq; i++)
+			vsi->rx_rings[i]->count = new_rx_cnt;
+		if (ice_is_xdp_ena_vsi(vsi))
+			for (i = 0; i < vsi->num_xdp_txq; i++)
+				vsi->xdp_rings[i]->count = new_tx_cnt;
+		vsi->num_tx_desc = (u16)new_tx_cnt;
+		vsi->num_rx_desc = (u16)new_rx_cnt;
+		netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
+		goto done;
+	}
+
+	if (new_tx_cnt == vsi->tx_rings[0]->count)
+		goto process_rx;
+
+	/* alloc updated Tx resources */
+	netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
+		    vsi->tx_rings[0]->count, new_tx_cnt);
+
+	tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL);
+	if (!tx_rings) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	ice_for_each_txq(vsi, i) {
+		/* clone ring and setup updated count */
+		tx_rings[i] = *vsi->tx_rings[i];
+		tx_rings[i].count = new_tx_cnt;
+		tx_rings[i].desc = NULL;
+		tx_rings[i].tx_buf = NULL;
+		err = ice_setup_tx_ring(&tx_rings[i]);
+		if (err) {
+			while (i--)
+				ice_clean_tx_ring(&tx_rings[i]);
+			kfree(tx_rings);
+			goto done;
+		}
+	}
+
+	if (!ice_is_xdp_ena_vsi(vsi))
+		goto process_rx;
+
+	/* alloc updated XDP resources */
+	netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+		    vsi->xdp_rings[0]->count, new_tx_cnt);
+
+	xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL);
+	if (!xdp_rings) {
+		err = -ENOMEM;
+		goto free_tx;
+	}
+
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		/* clone ring and setup updated count */
+		xdp_rings[i] = *vsi->xdp_rings[i];
+		xdp_rings[i].count = new_tx_cnt;
+		xdp_rings[i].desc = NULL;
+		xdp_rings[i].tx_buf = NULL;
+		err = ice_setup_tx_ring(&xdp_rings[i]);
+		if (err) {
+			while (i--)
+				ice_clean_tx_ring(&xdp_rings[i]);
+			kfree(xdp_rings);
+			goto free_tx;
+		}
+		ice_set_ring_xdp(&xdp_rings[i]);
+	}
+
+process_rx:
+	if (new_rx_cnt == vsi->rx_rings[0]->count)
+		goto process_link;
+
+	/* alloc updated Rx resources */
+	netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
+		    vsi->rx_rings[0]->count, new_rx_cnt);
+
+	rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL);
+	if (!rx_rings) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	ice_for_each_rxq(vsi, i) {
+		/* clone ring and setup updated count */
+		rx_rings[i] = *vsi->rx_rings[i];
+		rx_rings[i].count = new_rx_cnt;
+		rx_rings[i].desc = NULL;
+		rx_rings[i].rx_buf = NULL;
+		/* this is to allow wr32 to have something to write to
+		 * during early allocation of Rx buffers
+		 */
+		rx_rings[i].tail = vsi->back->hw.hw_addr + PRTGEN_STATUS;
+
+		err = ice_setup_rx_ring(&rx_rings[i]);
+		if (err)
+			goto rx_unwind;
+
+		/* allocate Rx buffers */
+		err = ice_alloc_rx_bufs(&rx_rings[i],
+					ICE_DESC_UNUSED(&rx_rings[i]));
+rx_unwind:
+		if (err) {
+			while (i) {
+				i--;
+				ice_free_rx_ring(&rx_rings[i]);
+			}
+			kfree(rx_rings);
+			err = -ENOMEM;
+			goto free_tx;
+		}
+	}
+
+process_link:
+	/* Bring interface down, copy in the new ring info, then restore the
+	 * interface. if VSI is up, bring it down and then back up
+	 */
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		ice_down(vsi);
+
+		if (tx_rings) {
+			ice_for_each_txq(vsi, i) {
+				ice_free_tx_ring(vsi->tx_rings[i]);
+				*vsi->tx_rings[i] = tx_rings[i];
+			}
+			kfree(tx_rings);
+		}
+
+		if (rx_rings) {
+			ice_for_each_rxq(vsi, i) {
+				ice_free_rx_ring(vsi->rx_rings[i]);
+				/* copy the real tail offset */
+				rx_rings[i].tail = vsi->rx_rings[i]->tail;
+				/* this is to fake out the allocation routine
+				 * into thinking it has to realloc everything
+				 * but the recycling logic will let us re-use
+				 * the buffers allocated above
+				 */
+				rx_rings[i].next_to_use = 0;
+				rx_rings[i].next_to_clean = 0;
+				rx_rings[i].next_to_alloc = 0;
+				*vsi->rx_rings[i] = rx_rings[i];
+			}
+			kfree(rx_rings);
+		}
+
+		if (xdp_rings) {
+			for (i = 0; i < vsi->num_xdp_txq; i++) {
+				ice_free_tx_ring(vsi->xdp_rings[i]);
+				*vsi->xdp_rings[i] = xdp_rings[i];
+			}
+			kfree(xdp_rings);
+		}
+
+		vsi->num_tx_desc = new_tx_cnt;
+		vsi->num_rx_desc = new_rx_cnt;
+		ice_up(vsi);
+	}
+	goto done;
+
+free_tx:
+	/* error cleanup if the Rx allocations failed after getting Tx */
+	if (tx_rings) {
+		ice_for_each_txq(vsi, i)
+			ice_free_tx_ring(&tx_rings[i]);
+		kfree(tx_rings);
+	}
+
+done:
+	clear_bit(__ICE_CFG_BUSY, pf->state);
+	return err;
+}
+
+/**
+ * ice_get_pauseparam - Get Flow Control status
+ * @netdev: network interface device structure
+ * @pause: ethernet pause (flow control) parameters
+ *
+ * Get requested flow control status from PHY capability.
+ * If autoneg is true, then ethtool will send the ETHTOOL_GSET ioctl which
+ * is handled by ice_get_link_ksettings. ice_get_link_ksettings will report
+ * the negotiated Rx/Tx pause via lp_advertising.
+ */
+static void
+ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_port_info *pi = np->vsi->port_info;
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_dcbx_cfg *dcbx_cfg;
+	enum ice_status status;
+
+	/* Initialize pause params */
+	pause->rx_pause = 0;
+	pause->tx_pause = 0;
+
+	dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return;
+
+	/* Get current PHY config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+				     NULL);
+	if (status)
+		goto out;
+
+	pause->autoneg = ice_is_phy_caps_an_enabled(pcaps) ? AUTONEG_ENABLE :
+							     AUTONEG_DISABLE;
+
+	if (dcbx_cfg->pfc.pfcena)
+		/* PFC enabled so report LFC as off */
+		goto out;
+
+	if (pcaps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
+		pause->tx_pause = 1;
+	if (pcaps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+		pause->rx_pause = 1;
+
+out:
+	kfree(pcaps);
+}
+
+/**
+ * ice_set_pauseparam - Set Flow Control parameter
+ * @netdev: network interface device structure
+ * @pause: return Tx/Rx flow control status
+ */
+static int
+ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_link_status *hw_link_info;
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_dcbx_cfg *dcbx_cfg;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	u8 aq_failures;
+	bool link_up;
+	int err = 0;
+	u32 is_an;
+
+	pi = vsi->port_info;
+	hw_link_info = &pi->phy.link_info;
+	dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+	/* Changing the port's flow control is not supported if this isn't the
+	 * PF VSI
+	 */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing flow control parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Get pause param reports configured and negotiated flow control pause
+	 * when ETHTOOL_GLINKSETTINGS is defined. Since ETHTOOL_GLINKSETTINGS is
+	 * defined get pause param pause->autoneg reports SW configured setting,
+	 * so compare pause->autoneg with SW configured to prevent the user from
+	 * using set pause param to chance autoneg.
+	 */
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	/* Get current PHY config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+				     NULL);
+	if (status) {
+		kfree(pcaps);
+		return -EIO;
+	}
+
+	is_an = ice_is_phy_caps_an_enabled(pcaps) ? AUTONEG_ENABLE :
+						    AUTONEG_DISABLE;
+
+	kfree(pcaps);
+
+	if (pause->autoneg != is_an) {
+		netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* If we have link and don't have autoneg */
+	if (!test_bit(__ICE_DOWN, pf->state) &&
+	    !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
+		/* Send message that it might not necessarily work*/
+		netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
+	}
+
+	if (dcbx_cfg->pfc.pfcena) {
+		netdev_info(netdev, "Priority flow control enabled. Cannot set link flow control.\n");
+		return -EOPNOTSUPP;
+	}
+	if (pause->rx_pause && pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_FULL;
+	else if (pause->rx_pause && !pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_RX_PAUSE;
+	else if (!pause->rx_pause && pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_TX_PAUSE;
+	else if (!pause->rx_pause && !pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_NONE;
+	else
+		return -EINVAL;
+
+	/* Set the FC mode and only restart AN if link is up */
+	status = ice_set_fc(pi, &aq_failures, link_up);
+
+	if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) {
+		netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %s aq_err %s\n",
+			    ice_stat_str(status),
+			    ice_aq_str(hw->adminq.sq_last_status));
+		err = -EAGAIN;
+	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) {
+		netdev_info(netdev, "Set fc failed on the set_phy_config call with err %s aq_err %s\n",
+			    ice_stat_str(status),
+			    ice_aq_str(hw->adminq.sq_last_status));
+		err = -EAGAIN;
+	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) {
+		netdev_info(netdev, "Set fc failed on the get_link_info call with err %s aq_err %s\n",
+			    ice_stat_str(status),
+			    ice_aq_str(hw->adminq.sq_last_status));
+		err = -EAGAIN;
+	}
+
+	return err;
+}
+
+/**
+ * ice_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_key_size(struct net_device __always_unused *netdev)
+{
+	return ICE_VSIQF_HKEY_ARRAY_SIZE;
+}
+
+/**
+ * ice_get_rxfh_indir_size - get the Rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return np->vsi->rss_table_size;
+}
+
+/**
+ * ice_get_rxfh - get the Rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware.
+ */
+static int
+ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int ret = 0, i;
+	u8 *lut;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (!indir)
+		return 0;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		/* RSS not supported return error here */
+		netdev_warn(netdev, "RSS is not configured on this VSI!\n");
+		return -EIO;
+	}
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	for (i = 0; i < vsi->rss_table_size; i++)
+		indir[i] = (u32)(lut[i]);
+
+out:
+	kfree(lut);
+	return ret;
+}
+
+/**
+ * ice_set_rxfh - set the Rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Returns -EINVAL if the table specifies an invalid queue ID, otherwise
+ * returns 0 after programming the table.
+ */
+static int
+ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
+	     const u8 hfunc)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u8 *seed = NULL;
+
+	dev = ice_pf_to_dev(pf);
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		/* RSS not supported return error here */
+		netdev_warn(netdev, "RSS is not configured on this VSI!\n");
+		return -EIO;
+	}
+
+	if (key) {
+		if (!vsi->rss_hkey_user) {
+			vsi->rss_hkey_user =
+				devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE,
+					     GFP_KERNEL);
+			if (!vsi->rss_hkey_user)
+				return -ENOMEM;
+		}
+		memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE);
+		seed = vsi->rss_hkey_user;
+	}
+
+	if (!vsi->rss_lut_user) {
+		vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size,
+						 GFP_KERNEL);
+		if (!vsi->rss_lut_user)
+			return -ENOMEM;
+	}
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	if (indir) {
+		int i;
+
+		for (i = 0; i < vsi->rss_table_size; i++)
+			vsi->rss_lut_user[i] = (u8)(indir[i]);
+	} else {
+		ice_fill_rss_lut(vsi->rss_lut_user, vsi->rss_table_size,
+				 vsi->rss_size);
+	}
+
+	if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_get_max_txq - return the maximum number of Tx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_txq(struct ice_pf *pf)
+{
+	return min3(pf->num_lan_msix, (u16)num_online_cpus(),
+		    (u16)pf->hw.func_caps.common_cap.num_txq);
+}
+
+/**
+ * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_rxq(struct ice_pf *pf)
+{
+	return min3(pf->num_lan_msix, (u16)num_online_cpus(),
+		    (u16)pf->hw.func_caps.common_cap.num_rxq);
+}
+
+/**
+ * ice_get_combined_cnt - return the current number of combined channels
+ * @vsi: PF VSI pointer
+ *
+ * Go through all queue vectors and count ones that have both Rx and Tx ring
+ * attached
+ */
+static u32 ice_get_combined_cnt(struct ice_vsi *vsi)
+{
+	u32 combined = 0;
+	int q_idx;
+
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring && q_vector->tx.ring)
+			combined++;
+	}
+
+	return combined;
+}
+
+/**
+ * ice_get_channels - get the current and max supported channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static void
+ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	/* report maximum channels */
+	ch->max_rx = ice_get_max_rxq(pf);
+	ch->max_tx = ice_get_max_txq(pf);
+	ch->max_combined = min_t(int, ch->max_rx, ch->max_tx);
+
+	/* report current channels */
+	ch->combined_count = ice_get_combined_cnt(vsi);
+	ch->rx_count = vsi->num_rxq - ch->combined_count;
+	ch->tx_count = vsi->num_txq - ch->combined_count;
+
+	/* report other queues */
+	ch->other_count = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
+	ch->max_other = ch->other_count;
+}
+
+/**
+ * ice_get_valid_rss_size - return valid number of RSS queues
+ * @hw: pointer to the HW structure
+ * @new_size: requested RSS queues
+ */
+static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size)
+{
+	struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
+
+	return min_t(int, new_size, BIT(caps->rss_table_entry_width));
+}
+
+/**
+ * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size
+ * @vsi: VSI to reconfigure RSS LUT on
+ * @req_rss_size: requested range of queue numbers for hashing
+ *
+ * Set the VSI's RSS parameters, configure the RSS LUT based on these.
+ */
+static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+	struct ice_hw *hw;
+	int err = 0;
+	u8 *lut;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+
+	if (!req_rss_size)
+		return -EINVAL;
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* set RSS LUT parameters */
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		vsi->rss_size = 1;
+	else
+		vsi->rss_size = ice_get_valid_rss_size(hw, req_rss_size);
+
+	/* create/set RSS LUT */
+	ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+	status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut,
+				    vsi->rss_table_size);
+	if (status) {
+		dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		err = -EIO;
+	}
+
+	kfree(lut);
+	return err;
+}
+
+/**
+ * ice_set_channels - set the number channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int new_rx = 0, new_tx = 0;
+	u32 curr_combined;
+
+	/* do not support changing channels in Safe Mode */
+	if (ice_is_safe_mode(pf)) {
+		netdev_err(dev, "Changing channel in Safe Mode is not supported\n");
+		return -EOPNOTSUPP;
+	}
+	/* do not support changing other_count */
+	if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U))
+		return -EINVAL;
+
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) {
+		netdev_err(dev, "Cannot set channels when Flow Director filters are active\n");
+		return -EOPNOTSUPP;
+	}
+
+	curr_combined = ice_get_combined_cnt(vsi);
+
+	/* these checks are for cases where user didn't specify a particular
+	 * value on cmd line but we get non-zero value anyway via
+	 * get_channels(); look at ethtool.c in ethtool repository (the user
+	 * space part), particularly, do_schannels() routine
+	 */
+	if (ch->rx_count == vsi->num_rxq - curr_combined)
+		ch->rx_count = 0;
+	if (ch->tx_count == vsi->num_txq - curr_combined)
+		ch->tx_count = 0;
+	if (ch->combined_count == curr_combined)
+		ch->combined_count = 0;
+
+	if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) {
+		netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n");
+		return -EINVAL;
+	}
+
+	new_rx = ch->combined_count + ch->rx_count;
+	new_tx = ch->combined_count + ch->tx_count;
+
+	if (new_rx > ice_get_max_rxq(pf)) {
+		netdev_err(dev, "Maximum allowed Rx channels is %d\n",
+			   ice_get_max_rxq(pf));
+		return -EINVAL;
+	}
+	if (new_tx > ice_get_max_txq(pf)) {
+		netdev_err(dev, "Maximum allowed Tx channels is %d\n",
+			   ice_get_max_txq(pf));
+		return -EINVAL;
+	}
+
+	ice_vsi_recfg_qs(vsi, new_rx, new_tx);
+
+	if (!netif_is_rxfh_configured(dev))
+		return ice_vsi_set_dflt_rss_lut(vsi, new_rx);
+
+	/* Update rss_size due to change in Rx queues */
+	vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
+
+	return 0;
+}
+
+/**
+ * ice_get_wol - get current Wake on LAN configuration
+ * @netdev: network interface device structure
+ * @wol: Ethtool structure to retrieve WoL settings
+ */
+static void ice_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	if (np->vsi->type != ICE_VSI_PF)
+		netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n");
+
+	/* Get WoL settings based on the HW capability */
+	if (ice_is_wol_supported(&pf->hw)) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0;
+	} else {
+		wol->supported = 0;
+		wol->wolopts = 0;
+	}
+}
+
+/**
+ * ice_set_wol - set Wake on LAN on supported device
+ * @netdev: network interface device structure
+ * @wol: Ethtool structure to set WoL
+ */
+static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(&pf->hw))
+		return -EOPNOTSUPP;
+
+	/* only magic packet is supported */
+	if (wol->wolopts && wol->wolopts != WAKE_MAGIC)
+		return -EOPNOTSUPP;
+
+	/* Set WoL only if there is a new value */
+	if (pf->wol_ena != !!wol->wolopts) {
+		pf->wol_ena = !!wol->wolopts;
+		device_set_wakeup_enable(ice_pf_to_dev(pf), pf->wol_ena);
+		netdev_dbg(netdev, "WoL magic packet %sabled\n",
+			   pf->wol_ena ? "en" : "dis");
+	}
+
+	return 0;
+}
+
+enum ice_container_type {
+	ICE_RX_CONTAINER,
+	ICE_TX_CONTAINER,
+};
+
+/**
+ * ice_get_rc_coalesce - get ITR values for specific ring container
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @c_type: container type, Rx or Tx
+ * @rc: ring container that the ITR values will come from
+ *
+ * Query the device for ice_ring_container specific ITR values. This is
+ * done per ice_ring_container because each q_vector can have 1 or more rings
+ * and all of said ring(s) will have the same ITR values.
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int
+ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
+		    struct ice_ring_container *rc)
+{
+	if (!rc->ring)
+		return -EINVAL;
+
+	switch (c_type) {
+	case ICE_RX_CONTAINER:
+		ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
+		ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+		ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl;
+		break;
+	case ICE_TX_CONTAINER:
+		ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
+		ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", c_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_q_coalesce - get a queue's ITR/INTRL (coalesce) settings
+ * @vsi: VSI associated to the queue for getting ITR/INTRL (coalesce) settings
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative under the following conditions:
+ * 1. Getting Tx or Rx ITR/INTRL (coalesce) settings failed.
+ * 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
+ */
+static int
+ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+{
+	if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+		if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
+					&vsi->rx_rings[q_num]->q_vector->rx))
+			return -EINVAL;
+		if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
+					&vsi->tx_rings[q_num]->q_vector->tx))
+			return -EINVAL;
+	} else if (q_num < vsi->num_rxq) {
+		if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER,
+					&vsi->rx_rings[q_num]->q_vector->rx))
+			return -EINVAL;
+	} else if (q_num < vsi->num_txq) {
+		if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER,
+					&vsi->tx_rings[q_num]->q_vector->tx))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * __ice_get_coalesce - get ITR/INTRL values for the device
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q_num: queue number to get the coalesce settings for
+ *
+ * If the caller passes in a negative q_num then we return coalesce settings
+ * based on queue number 0, else use the actual q_num passed in.
+ */
+static int
+__ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
+		   int q_num)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (q_num < 0)
+		q_num = 0;
+
+	if (ice_get_q_coalesce(vsi, ec, q_num))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+{
+	return __ice_get_coalesce(netdev, ec, -1);
+}
+
+static int
+ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+		       struct ethtool_coalesce *ec)
+{
+	return __ice_get_coalesce(netdev, ec, q_num);
+}
+
+/**
+ * ice_set_rc_coalesce - set ITR values for specific ring container
+ * @c_type: container type, Rx or Tx
+ * @ec: ethtool structure from user to update ITR settings
+ * @rc: ring container that the ITR values will come from
+ * @vsi: VSI associated to the ring container
+ *
+ * Set specific ITR values. This is done per ice_ring_container because each
+ * q_vector can have 1 or more rings and all of said ring(s) will have the same
+ * ITR values.
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int
+ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
+		    struct ice_ring_container *rc, struct ice_vsi *vsi)
+{
+	const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx";
+	u32 use_adaptive_coalesce, coalesce_usecs;
+	struct ice_pf *pf = vsi->back;
+	u16 itr_setting;
+
+	if (!rc->ring)
+		return -EINVAL;
+
+	switch (c_type) {
+	case ICE_RX_CONTAINER:
+		if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
+		    (ec->rx_coalesce_usecs_high &&
+		     ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
+			netdev_info(vsi->netdev, "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n",
+				    c_type_str, pf->hw.intrl_gran,
+				    ICE_MAX_INTRL);
+			return -EINVAL;
+		}
+		if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
+			rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
+			wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx),
+			     ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high,
+						   pf->hw.intrl_gran));
+		}
+
+		use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
+		coalesce_usecs = ec->rx_coalesce_usecs;
+
+		break;
+	case ICE_TX_CONTAINER:
+		use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
+		coalesce_usecs = ec->tx_coalesce_usecs;
+
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n",
+			c_type);
+		return -EINVAL;
+	}
+
+	itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+	if (coalesce_usecs != itr_setting && use_adaptive_coalesce) {
+		netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
+			    c_type_str, c_type_str);
+		return -EINVAL;
+	}
+
+	if (coalesce_usecs > ICE_ITR_MAX) {
+		netdev_info(vsi->netdev, "Invalid value, %s-usecs range is 0-%d\n",
+			    c_type_str, ICE_ITR_MAX);
+		return -EINVAL;
+	}
+
+	if (use_adaptive_coalesce) {
+		rc->itr_setting |= ICE_ITR_DYNAMIC;
+	} else {
+		/* save the user set usecs */
+		rc->itr_setting = coalesce_usecs;
+		/* device ITR granularity is in 2 usec increments */
+		rc->target_itr = ITR_REG_ALIGN(rc->itr_setting);
+	}
+
+	return 0;
+}
+
+/**
+ * ice_set_q_coalesce - set a queue's ITR/INTRL (coalesce) settings
+ * @vsi: VSI associated to the queue that need updating
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative under the following conditions:
+ * 1. Setting Tx or Rx ITR/INTRL (coalesce) settings failed.
+ * 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
+ */
+static int
+ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+{
+	if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+		if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
+					&vsi->rx_rings[q_num]->q_vector->rx,
+					vsi))
+			return -EINVAL;
+
+		if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
+					&vsi->tx_rings[q_num]->q_vector->tx,
+					vsi))
+			return -EINVAL;
+	} else if (q_num < vsi->num_rxq) {
+		if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec,
+					&vsi->rx_rings[q_num]->q_vector->rx,
+					vsi))
+			return -EINVAL;
+	} else if (q_num < vsi->num_txq) {
+		if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec,
+					&vsi->tx_rings[q_num]->q_vector->tx,
+					vsi))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs
+ * @netdev: netdev used for print
+ * @itr_setting: previous user setting
+ * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled
+ * @coalesce_usecs: requested value of [tx|rx]-usecs
+ * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs
+ */
+static void
+ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting,
+		       u32 use_adaptive_coalesce, u32 coalesce_usecs,
+		       const char *c_type_str)
+{
+	if (use_adaptive_coalesce)
+		return;
+
+	itr_setting = ITR_TO_REG(itr_setting);
+
+	if (itr_setting != coalesce_usecs && (coalesce_usecs % 2))
+		netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n",
+			    c_type_str, coalesce_usecs, c_type_str,
+			    ITR_REG_ALIGN(coalesce_usecs));
+}
+
+/**
+ * __ice_set_coalesce - set ITR/INTRL values for the device
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q_num: queue number to get the coalesce settings for
+ *
+ * If the caller passes in a negative q_num then we set the coalesce settings
+ * for all Tx/Rx queues, else use the actual q_num passed in.
+ */
+static int
+__ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
+		   int q_num)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (q_num < 0) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[0];
+		int v_idx;
+
+		if (q_vector) {
+			ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting,
+					       ec->use_adaptive_rx_coalesce,
+					       ec->rx_coalesce_usecs, "rx");
+
+			ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting,
+					       ec->use_adaptive_tx_coalesce,
+					       ec->tx_coalesce_usecs, "tx");
+		}
+
+		ice_for_each_q_vector(vsi, v_idx) {
+			/* In some cases if DCB is configured the num_[rx|tx]q
+			 * can be less than vsi->num_q_vectors. This check
+			 * accounts for that so we don't report a false failure
+			 */
+			if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq)
+				goto set_complete;
+
+			if (ice_set_q_coalesce(vsi, ec, v_idx))
+				return -EINVAL;
+		}
+		goto set_complete;
+	}
+
+	if (ice_set_q_coalesce(vsi, ec, q_num))
+		return -EINVAL;
+
+set_complete:
+
+	return 0;
+}
+
+static int
+ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+{
+	return __ice_set_coalesce(netdev, ec, -1);
+}
+
+static int
+ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
+		       struct ethtool_coalesce *ec)
+{
+	return __ice_set_coalesce(netdev, ec, q_num);
+}
+
+#define ICE_I2C_EEPROM_DEV_ADDR		0xA0
+#define ICE_I2C_EEPROM_DEV_ADDR2	0xA2
+#define ICE_MODULE_TYPE_SFP		0x03
+#define ICE_MODULE_TYPE_QSFP_PLUS	0x0D
+#define ICE_MODULE_TYPE_QSFP28		0x11
+#define ICE_MODULE_SFF_ADDR_MODE	0x04
+#define ICE_MODULE_SFF_DIAG_CAPAB	0x40
+#define ICE_MODULE_REVISION_ADDR	0x01
+#define ICE_MODULE_SFF_8472_COMP	0x5E
+#define ICE_MODULE_SFF_8472_SWAP	0x5C
+#define ICE_MODULE_QSFP_MAX_LEN		640
+
+/**
+ * ice_get_module_info - get SFF module type and revision information
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ */
+static int
+ice_get_module_info(struct net_device *netdev,
+		    struct ethtool_modinfo *modinfo)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u8 sff8472_comp = 0;
+	u8 sff8472_swap = 0;
+	u8 sff8636_rev = 0;
+	u8 value = 0;
+
+	status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
+				   0, &value, 1, 0, NULL);
+	if (status)
+		return -EIO;
+
+	switch (value) {
+	case ICE_MODULE_TYPE_SFP:
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_SFF_8472_COMP, 0x00, 0,
+					   &sff8472_comp, 1, 0, NULL);
+		if (status)
+			return -EIO;
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
+					   &sff8472_swap, 1, 0, NULL);
+		if (status)
+			return -EIO;
+
+		if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else if (sff8472_comp &&
+			   (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		}
+		break;
+	case ICE_MODULE_TYPE_QSFP_PLUS:
+	case ICE_MODULE_TYPE_QSFP28:
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_REVISION_ADDR, 0x00, 0,
+					   &sff8636_rev, 1, 0, NULL);
+		if (status)
+			return -EIO;
+		/* Check revision compliance */
+		if (sff8636_rev > 0x02) {
+			/* Module is SFF-8636 compliant */
+			modinfo->type = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+		}
+		break;
+	default:
+		netdev_warn(netdev, "SFF Module Type not recognized.\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * ice_get_module_eeprom - fill buffer with SFF EEPROM contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ */
+static int
+ice_get_module_eeprom(struct net_device *netdev,
+		      struct ethtool_eeprom *ee, u8 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	bool is_sfp = false;
+	unsigned int i;
+	u16 offset = 0;
+	u8 value = 0;
+	u8 page = 0;
+
+	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
+				   &value, 1, 0, NULL);
+	if (status)
+		return -EIO;
+
+	if (!ee || !ee->len || !data)
+		return -EINVAL;
+
+	if (value == ICE_MODULE_TYPE_SFP)
+		is_sfp = true;
+
+	for (i = 0; i < ee->len; i++) {
+		offset = i + ee->offset;
+
+		/* Check if we need to access the other memory page */
+		if (is_sfp) {
+			if (offset >= ETH_MODULE_SFF_8079_LEN) {
+				offset -= ETH_MODULE_SFF_8079_LEN;
+				addr = ICE_I2C_EEPROM_DEV_ADDR2;
+			}
+		} else {
+			while (offset >= ETH_MODULE_SFF_8436_LEN) {
+				/* Compute memory page number and offset. */
+				offset -= ETH_MODULE_SFF_8436_LEN / 2;
+				page++;
+			}
+		}
+
+		status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp,
+					   &value, 1, 0, NULL);
+		if (status)
+			value = 0;
+		data[i] = value;
+	}
+	return 0;
+}
+
+static const struct ethtool_ops ice_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE |
+				     ETHTOOL_COALESCE_RX_USECS_HIGH,
+	.get_link_ksettings	= ice_get_link_ksettings,
+	.set_link_ksettings	= ice_set_link_ksettings,
+	.get_drvinfo		= ice_get_drvinfo,
+	.get_regs_len		= ice_get_regs_len,
+	.get_regs		= ice_get_regs,
+	.get_wol		= ice_get_wol,
+	.set_wol		= ice_set_wol,
+	.get_msglevel		= ice_get_msglevel,
+	.set_msglevel		= ice_set_msglevel,
+	.self_test		= ice_self_test,
+	.get_link		= ethtool_op_get_link,
+	.get_eeprom_len		= ice_get_eeprom_len,
+	.get_eeprom		= ice_get_eeprom,
+	.get_coalesce		= ice_get_coalesce,
+	.set_coalesce		= ice_set_coalesce,
+	.get_strings		= ice_get_strings,
+	.set_phys_id		= ice_set_phys_id,
+	.get_ethtool_stats      = ice_get_ethtool_stats,
+	.get_priv_flags		= ice_get_priv_flags,
+	.set_priv_flags		= ice_set_priv_flags,
+	.get_sset_count		= ice_get_sset_count,
+	.get_rxnfc		= ice_get_rxnfc,
+	.set_rxnfc		= ice_set_rxnfc,
+	.get_ringparam		= ice_get_ringparam,
+	.set_ringparam		= ice_set_ringparam,
+	.nway_reset		= ice_nway_reset,
+	.get_pauseparam		= ice_get_pauseparam,
+	.set_pauseparam		= ice_set_pauseparam,
+	.get_rxfh_key_size	= ice_get_rxfh_key_size,
+	.get_rxfh_indir_size	= ice_get_rxfh_indir_size,
+	.get_rxfh		= ice_get_rxfh,
+	.set_rxfh		= ice_set_rxfh,
+	.get_channels		= ice_get_channels,
+	.set_channels		= ice_set_channels,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_per_queue_coalesce	= ice_get_per_q_coalesce,
+	.set_per_queue_coalesce	= ice_set_per_q_coalesce,
+	.get_fecparam		= ice_get_fecparam,
+	.set_fecparam		= ice_set_fecparam,
+	.get_module_info	= ice_get_module_info,
+	.get_module_eeprom	= ice_get_module_eeprom,
+};
+
+static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
+	.get_link_ksettings	= ice_get_link_ksettings,
+	.set_link_ksettings	= ice_set_link_ksettings,
+	.get_drvinfo		= ice_get_drvinfo,
+	.get_regs_len		= ice_get_regs_len,
+	.get_regs		= ice_get_regs,
+	.get_wol		= ice_get_wol,
+	.set_wol		= ice_set_wol,
+	.get_msglevel		= ice_get_msglevel,
+	.set_msglevel		= ice_set_msglevel,
+	.get_link		= ethtool_op_get_link,
+	.get_eeprom_len		= ice_get_eeprom_len,
+	.get_eeprom		= ice_get_eeprom,
+	.get_strings		= ice_get_strings,
+	.get_ethtool_stats	= ice_get_ethtool_stats,
+	.get_sset_count		= ice_get_sset_count,
+	.get_ringparam		= ice_get_ringparam,
+	.set_ringparam		= ice_set_ringparam,
+	.nway_reset		= ice_nway_reset,
+	.get_channels		= ice_get_channels,
+};
+
+/**
+ * ice_set_ethtool_safe_mode_ops - setup safe mode ethtool ops
+ * @netdev: network interface device structure
+ */
+void ice_set_ethtool_safe_mode_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ice_ethtool_safe_mode_ops;
+}
+
+/**
+ * ice_set_ethtool_ops - setup netdev ethtool ops
+ * @netdev: network interface device structure
+ *
+ * setup netdev ethtool ops with ice specific ops
+ */
+void ice_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ice_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
new file mode 100644
index 000000000..a122a267e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -0,0 +1,1703 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+/* flow director ethtool support for ice */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_flow.h"
+
+static struct in6_addr full_ipv6_addr_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+		}
+	}
+};
+
+static struct in6_addr zero_ipv6_addr_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		}
+	}
+};
+
+/* calls to ice_flow_add_prof require the number of segments in the array
+ * for segs_cnt. In this code that is one more than the index.
+ */
+#define TNL_SEG_CNT(_TNL_) ((_TNL_) + 1)
+
+/**
+ * ice_fltr_to_ethtool_flow - convert filter type values to ethtool
+ * flow type values
+ * @flow: filter type to be converted
+ *
+ * Returns the corresponding ethtool flow type.
+ */
+static int ice_fltr_to_ethtool_flow(enum ice_fltr_ptype flow)
+{
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		return TCP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		return UDP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		return SCTP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		return IPV4_USER_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		return TCP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		return UDP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		return SCTP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		return IPV6_USER_FLOW;
+	default:
+		/* 0 is undefined ethtool flow */
+		return 0;
+	}
+}
+
+/**
+ * ice_ethtool_flow_to_fltr - convert ethtool flow type to filter enum
+ * @eth: Ethtool flow type to be converted
+ *
+ * Returns flow enum
+ */
+static enum ice_fltr_ptype ice_ethtool_flow_to_fltr(int eth)
+{
+	switch (eth) {
+	case TCP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+	case UDP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+	case SCTP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+	case IPV4_USER_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+	case TCP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+	case UDP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+	case SCTP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+	case IPV6_USER_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+	default:
+		return ICE_FLTR_PTYPE_NONF_NONE;
+	}
+}
+
+/**
+ * ice_is_mask_valid - check mask field set
+ * @mask: full mask to check
+ * @field: field for which mask should be valid
+ *
+ * If the mask is fully set return true. If it is not valid for field return
+ * false.
+ */
+static bool ice_is_mask_valid(u64 mask, u64 field)
+{
+	return (mask & field) == field;
+}
+
+/**
+ * ice_get_ethtool_fdir_entry - fill ethtool structure with fdir filter data
+ * @hw: hardware structure that contains filter list
+ * @cmd: ethtool command data structure to receive the filter data
+ *
+ * Returns 0 on success and -EINVAL on failure
+ */
+int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp;
+	struct ice_fdir_fltr *rule;
+	int ret = 0;
+	u16 idx;
+
+	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+
+	rule = ice_fdir_find_fltr_by_idx(hw, fsp->location);
+
+	if (!rule || fsp->location != rule->fltr_id) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	fsp->flow_type = ice_fltr_to_ethtool_flow(rule->flow_type);
+
+	memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+	memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+	switch (fsp->flow_type) {
+	case IPV4_USER_FLOW:
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip.v4.l4_header;
+		fsp->h_u.usr_ip4_spec.tos = rule->ip.v4.tos;
+		fsp->h_u.usr_ip4_spec.ip4src = rule->ip.v4.src_ip;
+		fsp->h_u.usr_ip4_spec.ip4dst = rule->ip.v4.dst_ip;
+		fsp->m_u.usr_ip4_spec.ip4src = rule->mask.v4.src_ip;
+		fsp->m_u.usr_ip4_spec.ip4dst = rule->mask.v4.dst_ip;
+		fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->mask.v4.l4_header;
+		fsp->m_u.usr_ip4_spec.tos = rule->mask.v4.tos;
+		break;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fsp->h_u.tcp_ip4_spec.psrc = rule->ip.v4.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = rule->ip.v4.dst_port;
+		fsp->h_u.tcp_ip4_spec.ip4src = rule->ip.v4.src_ip;
+		fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip.v4.dst_ip;
+		fsp->m_u.tcp_ip4_spec.psrc = rule->mask.v4.src_port;
+		fsp->m_u.tcp_ip4_spec.pdst = rule->mask.v4.dst_port;
+		fsp->m_u.tcp_ip4_spec.ip4src = rule->mask.v4.src_ip;
+		fsp->m_u.tcp_ip4_spec.ip4dst = rule->mask.v4.dst_ip;
+		break;
+	case IPV6_USER_FLOW:
+		fsp->h_u.usr_ip6_spec.l4_4_bytes = rule->ip.v6.l4_header;
+		fsp->h_u.usr_ip6_spec.tclass = rule->ip.v6.tc;
+		fsp->h_u.usr_ip6_spec.l4_proto = rule->ip.v6.proto;
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->ip.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->ip.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6src, rule->mask.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6dst, rule->mask.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.usr_ip6_spec.l4_4_bytes = rule->mask.v6.l4_header;
+		fsp->m_u.usr_ip6_spec.tclass = rule->mask.v6.tc;
+		fsp->m_u.usr_ip6_spec.l4_proto = rule->mask.v6.proto;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->ip.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->ip.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.tcp_ip6_spec.psrc = rule->ip.v6.src_port;
+		fsp->h_u.tcp_ip6_spec.pdst = rule->ip.v6.dst_port;
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6src,
+		       rule->mask.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6dst,
+		       rule->mask.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.tcp_ip6_spec.psrc = rule->mask.v6.src_port;
+		fsp->m_u.tcp_ip6_spec.pdst = rule->mask.v6.dst_port;
+		fsp->h_u.tcp_ip6_spec.tclass = rule->ip.v6.tc;
+		fsp->m_u.tcp_ip6_spec.tclass = rule->mask.v6.tc;
+		break;
+	default:
+		break;
+	}
+
+	if (rule->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->q_index;
+
+	idx = ice_ethtool_flow_to_fltr(fsp->flow_type);
+	if (idx == ICE_FLTR_PTYPE_NONF_NONE) {
+		dev_err(ice_hw_to_dev(hw), "Missing input index for flow_type %d\n",
+			rule->flow_type);
+		ret = -EINVAL;
+	}
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+	return ret;
+}
+
+/**
+ * ice_get_fdir_fltr_ids - fill buffer with filter IDs of active filters
+ * @hw: hardware structure containing the filter list
+ * @cmd: ethtool command data structure
+ * @rule_locs: ethtool array passed in from OS to receive filter IDs
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+int
+ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
+		      u32 *rule_locs)
+{
+	struct ice_fdir_fltr *f_rule;
+	unsigned int cnt = 0;
+	int val = 0;
+
+	/* report total rule count */
+	cmd->data = ice_get_fdir_cnt_all(hw);
+
+	mutex_lock(&hw->fdir_fltr_lock);
+
+	list_for_each_entry(f_rule, &hw->fdir_list_head, fltr_node) {
+		if (cnt == cmd->rule_cnt) {
+			val = -EMSGSIZE;
+			goto release_lock;
+		}
+		rule_locs[cnt] = f_rule->fltr_id;
+		cnt++;
+	}
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+	if (!val)
+		cmd->rule_cnt = cnt;
+	return val;
+}
+
+/**
+ * ice_fdir_get_hw_prof - return the ice_fd_hw_proc associated with a flow
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow: FDir flow type to release
+ */
+static struct ice_fd_hw_prof *
+ice_fdir_get_hw_prof(struct ice_hw *hw, enum ice_block blk, int flow)
+{
+	if (blk == ICE_BLK_FD && hw->fdir_prof)
+		return hw->fdir_prof[flow];
+
+	return NULL;
+}
+
+/**
+ * ice_fdir_erase_flow_from_hw - remove a flow from the HW profile tables
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow: FDir flow type to release
+ */
+static void
+ice_fdir_erase_flow_from_hw(struct ice_hw *hw, enum ice_block blk, int flow)
+{
+	struct ice_fd_hw_prof *prof = ice_fdir_get_hw_prof(hw, blk, flow);
+	int tun;
+
+	if (!prof)
+		return;
+
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		u64 prof_id;
+		int j;
+
+		prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+		for (j = 0; j < prof->cnt; j++) {
+			u16 vsi_num;
+
+			if (!prof->entry_h[j][tun] || !prof->vsi_h[j])
+				continue;
+			vsi_num = ice_get_hw_vsi_num(hw, prof->vsi_h[j]);
+			ice_rem_prof_id_flow(hw, blk, vsi_num, prof_id);
+			ice_flow_rem_entry(hw, blk, prof->entry_h[j][tun]);
+			prof->entry_h[j][tun] = 0;
+		}
+		ice_flow_rem_prof(hw, blk, prof_id);
+	}
+}
+
+/**
+ * ice_fdir_rem_flow - release the ice_flow structures for a filter type
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow_type: FDir flow type to release
+ */
+static void
+ice_fdir_rem_flow(struct ice_hw *hw, enum ice_block blk,
+		  enum ice_fltr_ptype flow_type)
+{
+	int flow = (int)flow_type & ~FLOW_EXT;
+	struct ice_fd_hw_prof *prof;
+	int tun, i;
+
+	prof = ice_fdir_get_hw_prof(hw, blk, flow);
+	if (!prof)
+		return;
+
+	ice_fdir_erase_flow_from_hw(hw, blk, flow);
+	for (i = 0; i < prof->cnt; i++)
+		prof->vsi_h[i] = 0;
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		if (!prof->fdir_seg[tun])
+			continue;
+		devm_kfree(ice_hw_to_dev(hw), prof->fdir_seg[tun]);
+		prof->fdir_seg[tun] = NULL;
+	}
+	prof->cnt = 0;
+}
+
+/**
+ * ice_fdir_release_flows - release all flows in use for later replay
+ * @hw: pointer to HW instance
+ */
+void ice_fdir_release_flows(struct ice_hw *hw)
+{
+	int flow;
+
+	/* release Flow Director HW table entries */
+	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++)
+		ice_fdir_erase_flow_from_hw(hw, ICE_BLK_FD, flow);
+}
+
+/**
+ * ice_fdir_replay_flows - replay HW Flow Director filter info
+ * @hw: pointer to HW instance
+ */
+void ice_fdir_replay_flows(struct ice_hw *hw)
+{
+	int flow;
+
+	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		int tun;
+
+		if (!hw->fdir_prof[flow] || !hw->fdir_prof[flow]->cnt)
+			continue;
+		for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+			struct ice_flow_prof *hw_prof;
+			struct ice_fd_hw_prof *prof;
+			u64 prof_id;
+			int j;
+
+			prof = hw->fdir_prof[flow];
+			prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+			ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id,
+					  prof->fdir_seg[tun], TNL_SEG_CNT(tun),
+					  &hw_prof);
+			for (j = 0; j < prof->cnt; j++) {
+				enum ice_flow_priority prio;
+				u64 entry_h = 0;
+				int err;
+
+				prio = ICE_FLOW_PRIO_NORMAL;
+				err = ice_flow_add_entry(hw, ICE_BLK_FD,
+							 prof_id,
+							 prof->vsi_h[0],
+							 prof->vsi_h[j],
+							 prio, prof->fdir_seg,
+							 &entry_h);
+				if (err) {
+					dev_err(ice_hw_to_dev(hw), "Could not replay Flow Director, flow type %d\n",
+						flow);
+					continue;
+				}
+				prof->entry_h[j][tun] = entry_h;
+			}
+		}
+	}
+}
+
+/**
+ * ice_parse_rx_flow_user_data - deconstruct user-defined data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @data: pointer to userdef data structure for storage
+ *
+ * Returns 0 on success, negative error value on failure
+ */
+static int
+ice_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+			    struct ice_rx_flow_userdef *data)
+{
+	u64 value, mask;
+
+	memset(data, 0, sizeof(*data));
+	if (!(fsp->flow_type & FLOW_EXT))
+		return 0;
+
+	value = be64_to_cpu(*((__force __be64 *)fsp->h_ext.data));
+	mask = be64_to_cpu(*((__force __be64 *)fsp->m_ext.data));
+	if (!mask)
+		return 0;
+
+#define ICE_USERDEF_FLEX_WORD_M	GENMASK_ULL(15, 0)
+#define ICE_USERDEF_FLEX_OFFS_S	16
+#define ICE_USERDEF_FLEX_OFFS_M	GENMASK_ULL(31, ICE_USERDEF_FLEX_OFFS_S)
+#define ICE_USERDEF_FLEX_FLTR_M	GENMASK_ULL(31, 0)
+
+	/* 0x1fe is the maximum value for offsets stored in the internal
+	 * filtering tables.
+	 */
+#define ICE_USERDEF_FLEX_MAX_OFFS_VAL 0x1fe
+
+	if (!ice_is_mask_valid(mask, ICE_USERDEF_FLEX_FLTR_M) ||
+	    value > ICE_USERDEF_FLEX_FLTR_M)
+		return -EINVAL;
+
+	data->flex_word = value & ICE_USERDEF_FLEX_WORD_M;
+	data->flex_offset = (value & ICE_USERDEF_FLEX_OFFS_M) >>
+			     ICE_USERDEF_FLEX_OFFS_S;
+	if (data->flex_offset > ICE_USERDEF_FLEX_MAX_OFFS_VAL)
+		return -EINVAL;
+
+	data->flex_fltr = true;
+
+	return 0;
+}
+
+/**
+ * ice_fdir_num_avail_fltr - return the number of unused flow director filters
+ * @hw: pointer to hardware structure
+ * @vsi: software VSI structure
+ *
+ * There are 2 filter pools: guaranteed and best effort(shared). Each VSI can
+ * use filters from either pool. The guaranteed pool is divided between VSIs.
+ * The best effort filter pool is common to all VSIs and is a device shared
+ * resource pool. The number of filters available to this VSI is the sum of
+ * the VSIs guaranteed filter pool and the global available best effort
+ * filter pool.
+ *
+ * Returns the number of available flow director filters to this VSI
+ */
+static int ice_fdir_num_avail_fltr(struct ice_hw *hw, struct ice_vsi *vsi)
+{
+	u16 vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
+	u16 num_guar;
+	u16 num_be;
+
+	/* total guaranteed filters assigned to this VSI */
+	num_guar = vsi->num_gfltr;
+
+	/* minus the guaranteed filters programed by this VSI */
+	num_guar -= (rd32(hw, VSIQF_FD_CNT(vsi_num)) &
+		     VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S;
+
+	/* total global best effort filters */
+	num_be = hw->func_caps.fd_fltr_best_effort;
+
+	/* minus the global best effort filters programmed */
+	num_be -= (rd32(hw, GLQF_FD_CNT) & GLQF_FD_CNT_FD_BCNT_M) >>
+		   GLQF_FD_CNT_FD_BCNT_S;
+
+	return num_guar + num_be;
+}
+
+/**
+ * ice_fdir_alloc_flow_prof - allocate FDir flow profile structure(s)
+ * @hw: HW structure containing the FDir flow profile structure(s)
+ * @flow: flow type to allocate the flow profile for
+ *
+ * Allocate the fdir_prof and fdir_prof[flow] if not already created. Return 0
+ * on success and negative on error.
+ */
+static int
+ice_fdir_alloc_flow_prof(struct ice_hw *hw, enum ice_fltr_ptype flow)
+{
+	if (!hw)
+		return -EINVAL;
+
+	if (!hw->fdir_prof) {
+		hw->fdir_prof = devm_kcalloc(ice_hw_to_dev(hw),
+					     ICE_FLTR_PTYPE_MAX,
+					     sizeof(*hw->fdir_prof),
+					     GFP_KERNEL);
+		if (!hw->fdir_prof)
+			return -ENOMEM;
+	}
+
+	if (!hw->fdir_prof[flow]) {
+		hw->fdir_prof[flow] = devm_kzalloc(ice_hw_to_dev(hw),
+						   sizeof(**hw->fdir_prof),
+						   GFP_KERNEL);
+		if (!hw->fdir_prof[flow])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fdir_set_hw_fltr_rule - Configure HW tables to generate a FDir rule
+ * @pf: pointer to the PF structure
+ * @seg: protocol header description pointer
+ * @flow: filter enum
+ * @tun: FDir segment to program
+ */
+static int
+ice_fdir_set_hw_fltr_rule(struct ice_pf *pf, struct ice_flow_seg_info *seg,
+			  enum ice_fltr_ptype flow, enum ice_fd_hw_seg tun)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *main_vsi, *ctrl_vsi;
+	struct ice_flow_seg_info *old_seg;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_fd_hw_prof *hw_prof;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u64 entry1_h = 0;
+	u64 entry2_h = 0;
+	u64 prof_id;
+	int err;
+
+	main_vsi = ice_get_main_vsi(pf);
+	if (!main_vsi)
+		return -EINVAL;
+
+	ctrl_vsi = ice_get_ctrl_vsi(pf);
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	err = ice_fdir_alloc_flow_prof(hw, flow);
+	if (err)
+		return err;
+
+	hw_prof = hw->fdir_prof[flow];
+	old_seg = hw_prof->fdir_seg[tun];
+	if (old_seg) {
+		/* This flow_type already has a changed input set.
+		 * If it matches the requested input set then we are
+		 * done. Or, if it's different then it's an error.
+		 */
+		if (!memcmp(old_seg, seg, sizeof(*seg)))
+			return -EEXIST;
+
+		/* if there are FDir filters using this flow,
+		 * then return error.
+		 */
+		if (hw->fdir_fltr_cnt[flow]) {
+			dev_err(dev, "Failed to add filter.  Flow director filters on each port must have the same input set.\n");
+			return -EINVAL;
+		}
+
+		if (ice_is_arfs_using_perfect_flow(hw, flow)) {
+			dev_err(dev, "aRFS using perfect flow type %d, cannot change input set\n",
+				flow);
+			return -EINVAL;
+		}
+
+		/* remove HW filter definition */
+		ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
+	}
+
+	/* Adding a profile, but there is only one header supported.
+	 * That is the final parameters are 1 header (segment), no
+	 * actions (NULL) and zero actions 0.
+	 */
+	prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+	status = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg,
+				   TNL_SEG_CNT(tun), &prof);
+	if (status)
+		return ice_status_to_errno(status);
+	status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, main_vsi->idx,
+				    main_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				    seg, &entry1_h);
+	if (status) {
+		err = ice_status_to_errno(status);
+		goto err_prof;
+	}
+	status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, main_vsi->idx,
+				    ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				    seg, &entry2_h);
+	if (status) {
+		err = ice_status_to_errno(status);
+		goto err_entry;
+	}
+
+	hw_prof->fdir_seg[tun] = seg;
+	hw_prof->entry_h[0][tun] = entry1_h;
+	hw_prof->entry_h[1][tun] = entry2_h;
+	hw_prof->vsi_h[0] = main_vsi->idx;
+	hw_prof->vsi_h[1] = ctrl_vsi->idx;
+	if (!hw_prof->cnt)
+		hw_prof->cnt = 2;
+
+	return 0;
+
+err_entry:
+	ice_rem_prof_id_flow(hw, ICE_BLK_FD,
+			     ice_get_hw_vsi_num(hw, main_vsi->idx), prof_id);
+	ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h);
+err_prof:
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+	dev_err(dev, "Failed to add filter.  Flow director filters on each port must have the same input set.\n");
+
+	return err;
+}
+
+/**
+ * ice_set_init_fdir_seg
+ * @seg: flow segment for programming
+ * @l3_proto: ICE_FLOW_SEG_HDR_IPV4 or ICE_FLOW_SEG_HDR_IPV6
+ * @l4_proto: ICE_FLOW_SEG_HDR_TCP or ICE_FLOW_SEG_HDR_UDP
+ *
+ * Set the configuration for perfect filters to the provided flow segment for
+ * programming the HW filter. This is to be called only when initializing
+ * filters as this function it assumes no filters exist.
+ */
+static int
+ice_set_init_fdir_seg(struct ice_flow_seg_info *seg,
+		      enum ice_flow_seg_hdr l3_proto,
+		      enum ice_flow_seg_hdr l4_proto)
+{
+	enum ice_flow_field src_addr, dst_addr, src_port, dst_port;
+
+	if (!seg)
+		return -EINVAL;
+
+	if (l3_proto == ICE_FLOW_SEG_HDR_IPV4) {
+		src_addr = ICE_FLOW_FIELD_IDX_IPV4_SA;
+		dst_addr = ICE_FLOW_FIELD_IDX_IPV4_DA;
+	} else if (l3_proto == ICE_FLOW_SEG_HDR_IPV6) {
+		src_addr = ICE_FLOW_FIELD_IDX_IPV6_SA;
+		dst_addr = ICE_FLOW_FIELD_IDX_IPV6_DA;
+	} else {
+		return -EINVAL;
+	}
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else {
+		return -EINVAL;
+	}
+
+	ICE_FLOW_SET_HDRS(seg, l3_proto | l4_proto);
+
+	/* IP source address */
+	ice_flow_set_fld(seg, src_addr, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* IP destination address */
+	ice_flow_set_fld(seg, dst_addr, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* Layer 4 source port */
+	ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* Layer 4 destination port */
+	ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	return 0;
+}
+
+/**
+ * ice_create_init_fdir_rule
+ * @pf: PF structure
+ * @flow: filter enum
+ *
+ * Return error value or 0 on success.
+ */
+static int
+ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow)
+{
+	struct ice_flow_seg_info *seg, *tun_seg;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int ret;
+
+	/* if there is already a filter rule for kind return -EINVAL */
+	if (hw->fdir_prof && hw->fdir_prof[flow] &&
+	    hw->fdir_prof[flow]->fdir_seg[0])
+		return -EINVAL;
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX,
+			       GFP_KERNEL);
+	if (!tun_seg) {
+		devm_kfree(dev, seg);
+		return -ENOMEM;
+	}
+
+	if (flow == ICE_FLTR_PTYPE_NONF_IPV4_TCP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV4,
+					    ICE_FLOW_SEG_HDR_TCP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV4_UDP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV4,
+					    ICE_FLOW_SEG_HDR_UDP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV6_TCP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV6,
+					    ICE_FLOW_SEG_HDR_TCP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV6,
+					    ICE_FLOW_SEG_HDR_UDP);
+	else
+		ret = -EINVAL;
+	if (ret)
+		goto err_exit;
+
+	/* add filter for outer headers */
+	ret = ice_fdir_set_hw_fltr_rule(pf, seg, flow, ICE_FD_HW_SEG_NON_TUN);
+	if (ret)
+		/* could not write filter, free memory */
+		goto err_exit;
+
+	/* make tunneled filter HW entries if possible */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+	ret = ice_fdir_set_hw_fltr_rule(pf, tun_seg, flow, ICE_FD_HW_SEG_TUN);
+	if (ret)
+		/* could not write tunnel filter, but outer header filter
+		 * exists
+		 */
+		devm_kfree(dev, tun_seg);
+
+	set_bit(flow, hw->fdir_perfect_fltr);
+	return ret;
+err_exit:
+	devm_kfree(dev, tun_seg);
+	devm_kfree(dev, seg);
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ice_set_fdir_ip4_seg
+ * @seg: flow segment for programming
+ * @tcp_ip4_spec: mask data from ethtool
+ * @l4_proto: Layer 4 protocol to program
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the mask data into the flow segment to be used to program HW
+ * table based on provided L4 protocol for IPv4
+ */
+static int
+ice_set_fdir_ip4_seg(struct ice_flow_seg_info *seg,
+		     struct ethtool_tcpip4_spec *tcp_ip4_spec,
+		     enum ice_flow_seg_hdr l4_proto, bool *perfect_fltr)
+{
+	enum ice_flow_field src_port, dst_port;
+
+	/* make sure we don't have any empty rule */
+	if (!tcp_ip4_spec->psrc && !tcp_ip4_spec->ip4src &&
+	    !tcp_ip4_spec->pdst && !tcp_ip4_spec->ip4dst)
+		return -EINVAL;
+
+	/* filtering on TOS not supported */
+	if (tcp_ip4_spec->tos)
+		return -EOPNOTSUPP;
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_SCTP) {
+		src_port = ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_SCTP_DST_PORT;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 | l4_proto);
+
+	/* IP source address */
+	if (tcp_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!tcp_ip4_spec->ip4src)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* IP destination address */
+	if (tcp_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!tcp_ip4_spec->ip4dst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 source port */
+	if (tcp_ip4_spec->psrc == htons(0xFFFF))
+		ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip4_spec->psrc)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 destination port */
+	if (tcp_ip4_spec->pdst == htons(0xFFFF))
+		ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip4_spec->pdst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_set_fdir_ip4_usr_seg
+ * @seg: flow segment for programming
+ * @usr_ip4_spec: ethtool userdef packet offset
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the offset data into the flow segment to be used to program HW
+ * table for IPv4
+ */
+static int
+ice_set_fdir_ip4_usr_seg(struct ice_flow_seg_info *seg,
+			 struct ethtool_usrip4_spec *usr_ip4_spec,
+			 bool *perfect_fltr)
+{
+	/* first 4 bytes of Layer 4 header */
+	if (usr_ip4_spec->l4_4_bytes)
+		return -EINVAL;
+	if (usr_ip4_spec->tos)
+		return -EINVAL;
+	if (usr_ip4_spec->ip_ver)
+		return -EINVAL;
+	/* Filtering on Layer 4 protocol not supported */
+	if (usr_ip4_spec->proto)
+		return -EOPNOTSUPP;
+	/* empty rules are not valid */
+	if (!usr_ip4_spec->ip4src && !usr_ip4_spec->ip4dst)
+		return -EINVAL;
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4);
+
+	/* IP source address */
+	if (usr_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!usr_ip4_spec->ip4src)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* IP destination address */
+	if (usr_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!usr_ip4_spec->ip4dst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_set_fdir_ip6_seg
+ * @seg: flow segment for programming
+ * @tcp_ip6_spec: mask data from ethtool
+ * @l4_proto: Layer 4 protocol to program
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the mask data into the flow segment to be used to program HW
+ * table based on provided L4 protocol for IPv6
+ */
+static int
+ice_set_fdir_ip6_seg(struct ice_flow_seg_info *seg,
+		     struct ethtool_tcpip6_spec *tcp_ip6_spec,
+		     enum ice_flow_seg_hdr l4_proto, bool *perfect_fltr)
+{
+	enum ice_flow_field src_port, dst_port;
+
+	/* make sure we don't have any empty rule */
+	if (!memcmp(tcp_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !memcmp(tcp_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !tcp_ip6_spec->psrc && !tcp_ip6_spec->pdst)
+		return -EINVAL;
+
+	/* filtering on TC not supported */
+	if (tcp_ip6_spec->tclass)
+		return -EOPNOTSUPP;
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_SCTP) {
+		src_port = ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_SCTP_DST_PORT;
+	} else {
+		return -EINVAL;
+	}
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 | l4_proto);
+
+	if (!memcmp(tcp_ip6_spec->ip6src, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(tcp_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	if (!memcmp(tcp_ip6_spec->ip6dst, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(tcp_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 source port */
+	if (tcp_ip6_spec->psrc == htons(0xFFFF))
+		ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip6_spec->psrc)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 destination port */
+	if (tcp_ip6_spec->pdst == htons(0xFFFF))
+		ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip6_spec->pdst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_set_fdir_ip6_usr_seg
+ * @seg: flow segment for programming
+ * @usr_ip6_spec: ethtool userdef packet offset
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the offset data into the flow segment to be used to program HW
+ * table for IPv6
+ */
+static int
+ice_set_fdir_ip6_usr_seg(struct ice_flow_seg_info *seg,
+			 struct ethtool_usrip6_spec *usr_ip6_spec,
+			 bool *perfect_fltr)
+{
+	/* filtering on Layer 4 bytes not supported */
+	if (usr_ip6_spec->l4_4_bytes)
+		return -EOPNOTSUPP;
+	/* filtering on TC not supported */
+	if (usr_ip6_spec->tclass)
+		return -EOPNOTSUPP;
+	/* filtering on Layer 4 protocol not supported */
+	if (usr_ip6_spec->l4_proto)
+		return -EOPNOTSUPP;
+	/* empty rules are not valid */
+	if (!memcmp(usr_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !memcmp(usr_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		return -EINVAL;
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6);
+
+	if (!memcmp(usr_ip6_spec->ip6src, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(usr_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	if (!memcmp(usr_ip6_spec->ip6dst, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(usr_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_cfg_fdir_xtrct_seq - Configure extraction sequence for the given filter
+ * @pf: PF structure
+ * @fsp: pointer to ethtool Rx flow specification
+ * @user: user defined data from flow specification
+ *
+ * Returns 0 on success.
+ */
+static int
+ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
+		       struct ice_rx_flow_userdef *user)
+{
+	struct ice_flow_seg_info *seg, *tun_seg;
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_fltr_ptype fltr_idx;
+	struct ice_hw *hw = &pf->hw;
+	bool perfect_filter;
+	int ret;
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX,
+			       GFP_KERNEL);
+	if (!tun_seg) {
+		devm_kfree(dev, seg);
+		return -ENOMEM;
+	}
+
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_TCP,
+					   &perfect_filter);
+		break;
+	case UDP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_UDP,
+					   &perfect_filter);
+		break;
+	case SCTP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_SCTP,
+					   &perfect_filter);
+		break;
+	case IPV4_USER_FLOW:
+		ret = ice_set_fdir_ip4_usr_seg(seg, &fsp->m_u.usr_ip4_spec,
+					       &perfect_filter);
+		break;
+	case TCP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_TCP,
+					   &perfect_filter);
+		break;
+	case UDP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_UDP,
+					   &perfect_filter);
+		break;
+	case SCTP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_SCTP,
+					   &perfect_filter);
+		break;
+	case IPV6_USER_FLOW:
+		ret = ice_set_fdir_ip6_usr_seg(seg, &fsp->m_u.usr_ip6_spec,
+					       &perfect_filter);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	if (ret)
+		goto err_exit;
+
+	/* tunnel segments are shifted up one. */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+
+	if (user && user->flex_fltr) {
+		perfect_filter = false;
+		ice_flow_add_fld_raw(seg, user->flex_offset,
+				     ICE_FLTR_PRGM_FLEX_WORD_SIZE,
+				     ICE_FLOW_FLD_OFF_INVAL,
+				     ICE_FLOW_FLD_OFF_INVAL);
+		ice_flow_add_fld_raw(&tun_seg[1], user->flex_offset,
+				     ICE_FLTR_PRGM_FLEX_WORD_SIZE,
+				     ICE_FLOW_FLD_OFF_INVAL,
+				     ICE_FLOW_FLD_OFF_INVAL);
+	}
+
+	fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT);
+
+	assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter);
+
+	/* add filter for outer headers */
+	ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx,
+					ICE_FD_HW_SEG_NON_TUN);
+	if (ret == -EEXIST) {
+		/* Rule already exists, free memory and count as success */
+		ret = 0;
+		goto err_exit;
+	} else if (ret) {
+		/* could not write filter, free memory */
+		goto err_exit;
+	}
+
+	/* make tunneled filter HW entries if possible */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+	ret = ice_fdir_set_hw_fltr_rule(pf, tun_seg, fltr_idx,
+					ICE_FD_HW_SEG_TUN);
+	if (ret == -EEXIST) {
+		/* Rule already exists, free memory and count as success */
+		devm_kfree(dev, tun_seg);
+		ret = 0;
+	} else if (ret) {
+		/* could not write tunnel filter, but outer filter exists */
+		devm_kfree(dev, tun_seg);
+	}
+
+	return ret;
+
+err_exit:
+	devm_kfree(dev, tun_seg);
+	devm_kfree(dev, seg);
+
+	return ret;
+}
+
+/**
+ * ice_fdir_write_fltr - send a flow director filter to the hardware
+ * @pf: PF data structure
+ * @input: filter structure
+ * @add: true adds filter and false removed filter
+ * @is_tun: true adds inner filter on tunnel and false outer headers
+ *
+ * returns 0 on success and negative value on error
+ */
+int
+ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
+		    bool is_tun)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_fltr_desc desc;
+	struct ice_vsi *ctrl_vsi;
+	enum ice_status status;
+	u8 *pkt, *frag_pkt;
+	bool has_frag;
+	int err;
+
+	ctrl_vsi = ice_get_ctrl_vsi(pf);
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+	frag_pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!frag_pkt) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ice_fdir_get_prgm_desc(hw, input, &desc, add);
+	status = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+	if (status) {
+		err = ice_status_to_errno(status);
+		goto err_free_all;
+	}
+	err = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
+	if (err)
+		goto err_free_all;
+
+	/* repeat for fragment packet */
+	has_frag = ice_fdir_has_frag(input->flow_type);
+	if (has_frag) {
+		/* does not return error */
+		ice_fdir_get_prgm_desc(hw, input, &desc, add);
+		status = ice_fdir_get_gen_prgm_pkt(hw, input, frag_pkt, true,
+						   is_tun);
+		if (status) {
+			err = ice_status_to_errno(status);
+			goto err_frag;
+		}
+		err = ice_prgm_fdir_fltr(ctrl_vsi, &desc, frag_pkt);
+		if (err)
+			goto err_frag;
+	} else {
+		devm_kfree(dev, frag_pkt);
+	}
+
+	return 0;
+
+err_free_all:
+	devm_kfree(dev, frag_pkt);
+err_free:
+	devm_kfree(dev, pkt);
+	return err;
+
+err_frag:
+	devm_kfree(dev, frag_pkt);
+	return err;
+}
+
+/**
+ * ice_fdir_write_all_fltr - send a flow director filter to the hardware
+ * @pf: PF data structure
+ * @input: filter structure
+ * @add: true adds filter and false removed filter
+ *
+ * returns 0 on success and negative value on error
+ */
+static int
+ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input,
+			bool add)
+{
+	u16 port_num;
+	int tun;
+
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		bool is_tun = tun == ICE_FD_HW_SEG_TUN;
+		int err;
+
+		if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num))
+			continue;
+		err = ice_fdir_write_fltr(pf, input, add, is_tun);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/**
+ * ice_fdir_replay_fltrs - replay filters from the HW filter list
+ * @pf: board private structure
+ */
+void ice_fdir_replay_fltrs(struct ice_pf *pf)
+{
+	struct ice_fdir_fltr *f_rule;
+	struct ice_hw *hw = &pf->hw;
+
+	list_for_each_entry(f_rule, &hw->fdir_list_head, fltr_node) {
+		int err = ice_fdir_write_all_fltr(pf, f_rule, true);
+
+		if (err)
+			dev_dbg(ice_pf_to_dev(pf), "Flow Director error %d, could not reprogram filter %d\n",
+				err, f_rule->fltr_id);
+	}
+}
+
+/**
+ * ice_fdir_create_dflt_rules - create default perfect filters
+ * @pf: PF data structure
+ *
+ * Returns 0 for success or error.
+ */
+int ice_fdir_create_dflt_rules(struct ice_pf *pf)
+{
+	int err;
+
+	/* Create perfect TCP and UDP rules in hardware. */
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_TCP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_TCP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_UDP);
+
+	return err;
+}
+
+/**
+ * ice_vsi_manage_fdir - turn on/off flow director
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is an enable or disable request
+ */
+void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena)
+{
+	struct ice_fdir_fltr *f_rule, *tmp;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_fltr_ptype flow;
+
+	if (ena) {
+		set_bit(ICE_FLAG_FD_ENA, pf->flags);
+		ice_fdir_create_dflt_rules(pf);
+		return;
+	}
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	if (!test_and_clear_bit(ICE_FLAG_FD_ENA, pf->flags))
+		goto release_lock;
+	list_for_each_entry_safe(f_rule, tmp, &hw->fdir_list_head, fltr_node) {
+		/* ignore return value */
+		ice_fdir_write_all_fltr(pf, f_rule, false);
+		ice_fdir_update_cntrs(hw, f_rule->flow_type, false);
+		list_del(&f_rule->fltr_node);
+		devm_kfree(ice_hw_to_dev(hw), f_rule);
+	}
+
+	if (hw->fdir_prof)
+		for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX;
+		     flow++)
+			if (hw->fdir_prof[flow])
+				ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+}
+
+/**
+ * ice_fdir_do_rem_flow - delete flow and possibly add perfect flow
+ * @pf: PF structure
+ * @flow_type: FDir flow type to release
+ */
+static void
+ice_fdir_do_rem_flow(struct ice_pf *pf, enum ice_fltr_ptype flow_type)
+{
+	struct ice_hw *hw = &pf->hw;
+	bool need_perfect = false;
+
+	if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		need_perfect = true;
+
+	if (need_perfect && test_bit(flow_type, hw->fdir_perfect_fltr))
+		return;
+
+	ice_fdir_rem_flow(hw, ICE_BLK_FD, flow_type);
+	if (need_perfect)
+		ice_create_init_fdir_rule(pf, flow_type);
+}
+
+/**
+ * ice_fdir_update_list_entry - add or delete a filter from the filter list
+ * @pf: PF structure
+ * @input: filter structure
+ * @fltr_idx: ethtool index of filter to modify
+ *
+ * returns 0 on success and negative on errors
+ */
+static int
+ice_fdir_update_list_entry(struct ice_pf *pf, struct ice_fdir_fltr *input,
+			   int fltr_idx)
+{
+	struct ice_fdir_fltr *old_fltr;
+	struct ice_hw *hw = &pf->hw;
+	int err = -ENOENT;
+
+	/* Do not update filters during reset */
+	if (ice_is_reset_in_progress(pf->state))
+		return -EBUSY;
+
+	old_fltr = ice_fdir_find_fltr_by_idx(hw, fltr_idx);
+	if (old_fltr) {
+		err = ice_fdir_write_all_fltr(pf, old_fltr, false);
+		if (err)
+			return err;
+		ice_fdir_update_cntrs(hw, old_fltr->flow_type, false);
+		if (!input && !hw->fdir_fltr_cnt[old_fltr->flow_type])
+			/* we just deleted the last filter of flow_type so we
+			 * should also delete the HW filter info.
+			 */
+			ice_fdir_do_rem_flow(pf, old_fltr->flow_type);
+		list_del(&old_fltr->fltr_node);
+		devm_kfree(ice_hw_to_dev(hw), old_fltr);
+	}
+	if (!input)
+		return err;
+	ice_fdir_list_add_fltr(hw, input);
+	ice_fdir_update_cntrs(hw, input->flow_type, true);
+	return 0;
+}
+
+/**
+ * ice_del_fdir_ethtool - delete Flow Director filter
+ * @vsi: pointer to target VSI
+ * @cmd: command to add or delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int val;
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EOPNOTSUPP;
+
+	/* Do not delete filters during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(ice_pf_to_dev(pf), "Device is resetting - deleting Flow Director filters not supported during reset\n");
+		return -EBUSY;
+	}
+
+	if (test_bit(__ICE_FD_FLUSH_REQ, pf->state))
+		return -EBUSY;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	val = ice_fdir_update_list_entry(pf, NULL, fsp->location);
+	mutex_unlock(&hw->fdir_fltr_lock);
+
+	return val;
+}
+
+/**
+ * ice_set_fdir_input_set - Set the input set for Flow Director
+ * @vsi: pointer to target VSI
+ * @fsp: pointer to ethtool Rx flow specification
+ * @input: filter structure
+ */
+static int
+ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp,
+		       struct ice_fdir_fltr *input)
+{
+	u16 dest_vsi, q_index = 0;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int flow_type;
+	u8 dest_ctl;
+
+	if (!vsi || !fsp || !input)
+		return -EINVAL;
+
+	pf = vsi->back;
+	hw = &pf->hw;
+
+	dest_vsi = vsi->idx;
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+		dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
+	} else {
+		u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+		u8 vf = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+
+		if (vf) {
+			dev_err(ice_pf_to_dev(pf), "Failed to add filter. Flow director filters are not supported on VF queues.\n");
+			return -EINVAL;
+		}
+
+		if (ring >= vsi->num_rxq)
+			return -EINVAL;
+
+		dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+		q_index = ring;
+	}
+
+	input->fltr_id = fsp->location;
+	input->q_index = q_index;
+	flow_type = fsp->flow_type & ~FLOW_EXT;
+
+	input->dest_vsi = dest_vsi;
+	input->dest_ctl = dest_ctl;
+	input->fltr_status = ICE_FLTR_PRGM_DESC_FD_STATUS_FD_ID;
+	input->cnt_index = ICE_FD_SB_STAT_IDX(hw->fd_ctr_base);
+	input->flow_type = ice_ethtool_flow_to_fltr(flow_type);
+
+	if (fsp->flow_type & FLOW_EXT) {
+		memcpy(input->ext_data.usr_def, fsp->h_ext.data,
+		       sizeof(input->ext_data.usr_def));
+		input->ext_data.vlan_type = fsp->h_ext.vlan_etype;
+		input->ext_data.vlan_tag = fsp->h_ext.vlan_tci;
+		memcpy(input->ext_mask.usr_def, fsp->m_ext.data,
+		       sizeof(input->ext_mask.usr_def));
+		input->ext_mask.vlan_type = fsp->m_ext.vlan_etype;
+		input->ext_mask.vlan_tag = fsp->m_ext.vlan_tci;
+	}
+
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		input->ip.v4.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+		input->ip.v4.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		input->ip.v4.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+		input->ip.v4.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+		input->mask.v4.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+		input->mask.v4.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+		input->mask.v4.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
+		input->mask.v4.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
+		break;
+	case IPV4_USER_FLOW:
+		input->ip.v4.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
+		input->ip.v4.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+		input->ip.v4.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
+		input->ip.v4.proto = fsp->h_u.usr_ip4_spec.proto;
+		input->ip.v4.ip_ver = fsp->h_u.usr_ip4_spec.ip_ver;
+		input->ip.v4.tos = fsp->h_u.usr_ip4_spec.tos;
+		input->mask.v4.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
+		input->mask.v4.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
+		input->mask.v4.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+		input->mask.v4.proto = fsp->m_u.usr_ip4_spec.proto;
+		input->mask.v4.ip_ver = fsp->m_u.usr_ip4_spec.ip_ver;
+		input->mask.v4.tos = fsp->m_u.usr_ip4_spec.tos;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(input->ip.v6.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->ip.v6.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->ip.v6.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+		input->ip.v6.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+		input->ip.v6.tc = fsp->h_u.tcp_ip6_spec.tclass;
+		memcpy(input->mask.v6.dst_ip, fsp->m_u.tcp_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->mask.v6.src_ip, fsp->m_u.tcp_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->mask.v6.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+		input->mask.v6.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+		input->mask.v6.tc = fsp->m_u.tcp_ip6_spec.tclass;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(input->ip.v6.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->ip.v6.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->ip.v6.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes;
+		input->ip.v6.tc = fsp->h_u.usr_ip6_spec.tclass;
+
+		/* if no protocol requested, use IPPROTO_NONE */
+		if (!fsp->m_u.usr_ip6_spec.l4_proto)
+			input->ip.v6.proto = IPPROTO_NONE;
+		else
+			input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto;
+
+		memcpy(input->mask.v6.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->mask.v6.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->mask.v6.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
+		input->mask.v6.tc = fsp->m_u.usr_ip6_spec.tclass;
+		input->mask.v6.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+		break;
+	default:
+		/* not doing un-parsed flow types */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_add_fdir_ethtool - Add/Remove Flow Director filter
+ * @vsi: pointer to target VSI
+ * @cmd: command to add or delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+{
+	struct ice_rx_flow_userdef userdata;
+	struct ethtool_rx_flow_spec *fsp;
+	struct ice_fdir_fltr *input;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int fltrs_needed;
+	u16 tunnel_port;
+	int ret;
+
+	if (!vsi)
+		return -EINVAL;
+
+	pf = vsi->back;
+	hw = &pf->hw;
+	dev = ice_pf_to_dev(pf);
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EOPNOTSUPP;
+
+	/* Do not program filters during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(dev, "Device is resetting - adding Flow Director filters not supported during reset\n");
+		return -EBUSY;
+	}
+
+	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	if (ice_parse_rx_flow_user_data(fsp, &userdata))
+		return -EINVAL;
+
+	if (fsp->flow_type & FLOW_MAC_EXT)
+		return -EINVAL;
+
+	ret = ice_cfg_fdir_xtrct_seq(pf, fsp, &userdata);
+	if (ret)
+		return ret;
+
+	if (fsp->location >= ice_get_fdir_cnt_all(hw)) {
+		dev_err(dev, "Failed to add filter.  The maximum number of flow director filters has been reached.\n");
+		return -ENOSPC;
+	}
+
+	/* return error if not an update and no available filters */
+	fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port) ? 2 : 1;
+	if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) &&
+	    ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) {
+		dev_err(dev, "Failed to add filter.  The maximum number of flow director filters has been reached.\n");
+		return -ENOSPC;
+	}
+
+	input = devm_kzalloc(dev, sizeof(*input), GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	ret = ice_set_fdir_input_set(vsi, fsp, input);
+	if (ret)
+		goto free_input;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	if (ice_fdir_is_dup_fltr(hw, input)) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	if (userdata.flex_fltr) {
+		input->flex_fltr = true;
+		input->flex_word = cpu_to_be16(userdata.flex_word);
+		input->flex_offset = userdata.flex_offset;
+	}
+
+	/* input struct is added to the HW filter list */
+	ret = ice_fdir_update_list_entry(pf, input, fsp->location);
+	if (ret)
+		goto release_lock;
+
+	ret = ice_fdir_write_all_fltr(pf, input, true);
+	if (ret)
+		goto remove_sw_rule;
+
+	goto release_lock;
+
+remove_sw_rule:
+	ice_fdir_update_cntrs(hw, input->flow_type, false);
+	list_del(&input->fltr_node);
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+free_input:
+	if (ret)
+		devm_kfree(dev, input);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
new file mode 100644
index 000000000..59c0c6a0f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -0,0 +1,840 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice_common.h"
+
+/* These are training packet headers used to program flow director filters. */
+static const u8 ice_fdir_tcpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x28, 0x00, 0x01, 0x00, 0x00, 0x40, 0x06,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00,
+	0x20, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const u8 ice_fdir_udpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x20, 0x00, 0x00, 0x40, 0x00, 0x40, 0x84,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x10,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_tcpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x14, 0x06, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x50, 0x00, 0x20, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_udpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x0C, 0x84, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x3B, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x5a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x28, 0x00, 0x00, 0x40, 0x00,
+	0x40, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4e, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x40, 0x00,
+	0x40, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x52, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00,
+	0x40, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ip4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x46, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00,
+	0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x6e, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x14, 0x06, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x20, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x62, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x08, 0x11, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x66, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x84, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ip6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x5a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/* Flow Director no-op training packet table */
+static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_TCP,
+		sizeof(ice_fdir_tcpv4_pkt), ice_fdir_tcpv4_pkt,
+		sizeof(ice_fdir_tcp4_tun_pkt), ice_fdir_tcp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+		sizeof(ice_fdir_udpv4_pkt), ice_fdir_udpv4_pkt,
+		sizeof(ice_fdir_udp4_tun_pkt), ice_fdir_udp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
+		sizeof(ice_fdir_sctpv4_pkt), ice_fdir_sctpv4_pkt,
+		sizeof(ice_fdir_sctp4_tun_pkt), ice_fdir_sctp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+		sizeof(ice_fdir_ipv4_pkt), ice_fdir_ipv4_pkt,
+		sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_TCP,
+		sizeof(ice_fdir_tcpv6_pkt), ice_fdir_tcpv6_pkt,
+		sizeof(ice_fdir_tcp6_tun_pkt), ice_fdir_tcp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+		sizeof(ice_fdir_udpv6_pkt), ice_fdir_udpv6_pkt,
+		sizeof(ice_fdir_udp6_tun_pkt), ice_fdir_udp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_SCTP,
+		sizeof(ice_fdir_sctpv6_pkt), ice_fdir_sctpv6_pkt,
+		sizeof(ice_fdir_sctp6_tun_pkt), ice_fdir_sctp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_OTHER,
+		sizeof(ice_fdir_ipv6_pkt), ice_fdir_ipv6_pkt,
+		sizeof(ice_fdir_ip6_tun_pkt), ice_fdir_ip6_tun_pkt,
+	},
+};
+
+#define ICE_FDIR_NUM_PKT ARRAY_SIZE(ice_fdir_pkt)
+
+/**
+ * ice_set_dflt_val_fd_desc
+ * @fd_fltr_ctx: pointer to fd filter descriptor
+ */
+static void ice_set_dflt_val_fd_desc(struct ice_fd_fltr_desc_ctx *fd_fltr_ctx)
+{
+	fd_fltr_ctx->comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
+	fd_fltr_ctx->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+	fd_fltr_ctx->fd_space = ICE_FXD_FLTR_QW0_FD_SPACE_GUAR_BEST;
+	fd_fltr_ctx->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	fd_fltr_ctx->evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_TRUE;
+	fd_fltr_ctx->toq = ICE_FXD_FLTR_QW0_TO_Q_EQUALS_QINDEX;
+	fd_fltr_ctx->toq_prio = ICE_FXD_FLTR_QW0_TO_Q_PRIO1;
+	fd_fltr_ctx->dpu_recipe = ICE_FXD_FLTR_QW0_DPU_RECIPE_DFLT;
+	fd_fltr_ctx->drop = ICE_FXD_FLTR_QW0_DROP_NO;
+	fd_fltr_ctx->flex_prio = ICE_FXD_FLTR_QW0_FLEX_PRI_NONE;
+	fd_fltr_ctx->flex_mdid = ICE_FXD_FLTR_QW0_FLEX_MDID0;
+	fd_fltr_ctx->flex_val = ICE_FXD_FLTR_QW0_FLEX_VAL0;
+	fd_fltr_ctx->dtype = ICE_TX_DESC_DTYPE_FLTR_PROG;
+	fd_fltr_ctx->desc_prof_prio = ICE_FXD_FLTR_QW1_PROF_PRIO_ZERO;
+	fd_fltr_ctx->desc_prof = ICE_FXD_FLTR_QW1_PROF_ZERO;
+	fd_fltr_ctx->swap = ICE_FXD_FLTR_QW1_SWAP_SET;
+	fd_fltr_ctx->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_ONE;
+	fd_fltr_ctx->fdid_mdid = ICE_FXD_FLTR_QW1_FDID_MDID_FD;
+	fd_fltr_ctx->fdid = ICE_FXD_FLTR_QW1_FDID_ZERO;
+}
+
+/**
+ * ice_set_fd_desc_val
+ * @ctx: pointer to fd filter descriptor context
+ * @fdir_desc: populated with fd filter descriptor values
+ */
+static void
+ice_set_fd_desc_val(struct ice_fd_fltr_desc_ctx *ctx,
+		    struct ice_fltr_desc *fdir_desc)
+{
+	u64 qword;
+
+	/* prep QW0 of FD filter programming desc */
+	qword = ((u64)ctx->qindex << ICE_FXD_FLTR_QW0_QINDEX_S) &
+		ICE_FXD_FLTR_QW0_QINDEX_M;
+	qword |= ((u64)ctx->comp_q << ICE_FXD_FLTR_QW0_COMP_Q_S) &
+		 ICE_FXD_FLTR_QW0_COMP_Q_M;
+	qword |= ((u64)ctx->comp_report << ICE_FXD_FLTR_QW0_COMP_REPORT_S) &
+		 ICE_FXD_FLTR_QW0_COMP_REPORT_M;
+	qword |= ((u64)ctx->fd_space << ICE_FXD_FLTR_QW0_FD_SPACE_S) &
+		 ICE_FXD_FLTR_QW0_FD_SPACE_M;
+	qword |= ((u64)ctx->cnt_index << ICE_FXD_FLTR_QW0_STAT_CNT_S) &
+		 ICE_FXD_FLTR_QW0_STAT_CNT_M;
+	qword |= ((u64)ctx->cnt_ena << ICE_FXD_FLTR_QW0_STAT_ENA_S) &
+		 ICE_FXD_FLTR_QW0_STAT_ENA_M;
+	qword |= ((u64)ctx->evict_ena << ICE_FXD_FLTR_QW0_EVICT_ENA_S) &
+		 ICE_FXD_FLTR_QW0_EVICT_ENA_M;
+	qword |= ((u64)ctx->toq << ICE_FXD_FLTR_QW0_TO_Q_S) &
+		 ICE_FXD_FLTR_QW0_TO_Q_M;
+	qword |= ((u64)ctx->toq_prio << ICE_FXD_FLTR_QW0_TO_Q_PRI_S) &
+		 ICE_FXD_FLTR_QW0_TO_Q_PRI_M;
+	qword |= ((u64)ctx->dpu_recipe << ICE_FXD_FLTR_QW0_DPU_RECIPE_S) &
+		 ICE_FXD_FLTR_QW0_DPU_RECIPE_M;
+	qword |= ((u64)ctx->drop << ICE_FXD_FLTR_QW0_DROP_S) &
+		 ICE_FXD_FLTR_QW0_DROP_M;
+	qword |= ((u64)ctx->flex_prio << ICE_FXD_FLTR_QW0_FLEX_PRI_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_PRI_M;
+	qword |= ((u64)ctx->flex_mdid << ICE_FXD_FLTR_QW0_FLEX_MDID_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_MDID_M;
+	qword |= ((u64)ctx->flex_val << ICE_FXD_FLTR_QW0_FLEX_VAL_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_VAL_M;
+	fdir_desc->qidx_compq_space_stat = cpu_to_le64(qword);
+
+	/* prep QW1 of FD filter programming desc */
+	qword = ((u64)ctx->dtype << ICE_FXD_FLTR_QW1_DTYPE_S) &
+		ICE_FXD_FLTR_QW1_DTYPE_M;
+	qword |= ((u64)ctx->pcmd << ICE_FXD_FLTR_QW1_PCMD_S) &
+		 ICE_FXD_FLTR_QW1_PCMD_M;
+	qword |= ((u64)ctx->desc_prof_prio << ICE_FXD_FLTR_QW1_PROF_PRI_S) &
+		 ICE_FXD_FLTR_QW1_PROF_PRI_M;
+	qword |= ((u64)ctx->desc_prof << ICE_FXD_FLTR_QW1_PROF_S) &
+		 ICE_FXD_FLTR_QW1_PROF_M;
+	qword |= ((u64)ctx->fd_vsi << ICE_FXD_FLTR_QW1_FD_VSI_S) &
+		 ICE_FXD_FLTR_QW1_FD_VSI_M;
+	qword |= ((u64)ctx->swap << ICE_FXD_FLTR_QW1_SWAP_S) &
+		 ICE_FXD_FLTR_QW1_SWAP_M;
+	qword |= ((u64)ctx->fdid_prio << ICE_FXD_FLTR_QW1_FDID_PRI_S) &
+		 ICE_FXD_FLTR_QW1_FDID_PRI_M;
+	qword |= ((u64)ctx->fdid_mdid << ICE_FXD_FLTR_QW1_FDID_MDID_S) &
+		 ICE_FXD_FLTR_QW1_FDID_MDID_M;
+	qword |= ((u64)ctx->fdid << ICE_FXD_FLTR_QW1_FDID_S) &
+		 ICE_FXD_FLTR_QW1_FDID_M;
+	fdir_desc->dtype_cmd_vsi_fdid = cpu_to_le64(qword);
+}
+
+/**
+ * ice_fdir_get_prgm_desc - set a fdir descriptor from a fdir filter struct
+ * @hw: pointer to the hardware structure
+ * @input: filter
+ * @fdesc: filter descriptor
+ * @add: if add is true, this is an add operation, false implies delete
+ */
+void
+ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
+		       struct ice_fltr_desc *fdesc, bool add)
+{
+	struct ice_fd_fltr_desc_ctx fdir_fltr_ctx = { 0 };
+
+	/* set default context info */
+	ice_set_dflt_val_fd_desc(&fdir_fltr_ctx);
+
+	/* change sideband filtering values */
+	fdir_fltr_ctx.fdid = input->fltr_id;
+	if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT) {
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_YES;
+		fdir_fltr_ctx.qindex = 0;
+	} else {
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
+		fdir_fltr_ctx.qindex = input->q_index;
+	}
+	fdir_fltr_ctx.cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	fdir_fltr_ctx.cnt_index = input->cnt_index;
+	fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi);
+	fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE;
+	fdir_fltr_ctx.toq_prio = 3;
+	fdir_fltr_ctx.pcmd = add ? ICE_FXD_FLTR_QW1_PCMD_ADD :
+		ICE_FXD_FLTR_QW1_PCMD_REMOVE;
+	fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET;
+	fdir_fltr_ctx.comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
+	fdir_fltr_ctx.comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+	fdir_fltr_ctx.fdid_prio = 3;
+	fdir_fltr_ctx.desc_prof = 1;
+	fdir_fltr_ctx.desc_prof_prio = 3;
+	ice_set_fd_desc_val(&fdir_fltr_ctx, fdesc);
+}
+
+/**
+ * ice_alloc_fd_res_cntr - obtain counter resource for FD type
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ */
+enum ice_status ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, cntr_id);
+}
+
+/**
+ * ice_free_fd_res_cntr - Free counter resource for FD type
+ * @hw: pointer to the hardware structure
+ * @cntr_id: counter index to be freed
+ */
+enum ice_status ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id)
+{
+	return ice_free_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK,
+				 ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, cntr_id);
+}
+
+/**
+ * ice_alloc_fd_guar_item - allocate resource for FD guaranteed entries
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ * @num_fltr: number of filter entries to be allocated
+ */
+enum ice_status
+ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
+				  cntr_id);
+}
+
+/**
+ * ice_alloc_fd_shrd_item - allocate resource for flow director shared entries
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ * @num_fltr: number of filter entries to be allocated
+ */
+enum ice_status
+ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
+				  cntr_id);
+}
+
+/**
+ * ice_get_fdir_cnt_all - get the number of Flow Director filters
+ * @hw: hardware data structure
+ *
+ * Returns the number of filters available on device
+ */
+int ice_get_fdir_cnt_all(struct ice_hw *hw)
+{
+	return hw->func_caps.fd_fltr_guar + hw->func_caps.fd_fltr_best_effort;
+}
+
+/**
+ * ice_pkt_insert_ipv6_addr - insert a be32 IPv6 address into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @addr: IPv6 address to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_ipv6_addr(u8 *pkt, int offset, __be32 *addr)
+{
+	int idx;
+
+	for (idx = 0; idx < ICE_IPV6_ADDR_LEN_AS_U32; idx++)
+		memcpy(pkt + offset + idx * sizeof(*addr), &addr[idx],
+		       sizeof(*addr));
+}
+
+/**
+ * ice_pkt_insert_u16 - insert a be16 value into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 16 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u16(u8 *pkt, int offset, __be16 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_u32 - insert a be32 value into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 32 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u32(u8 *pkt, int offset, __be32 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_fdir_get_gen_prgm_pkt - generate a training packet
+ * @hw: pointer to the hardware structure
+ * @input: flow director filter data structure
+ * @pkt: pointer to return filter packet
+ * @frag: generate a fragment packet
+ * @tun: true implies generate a tunnel packet
+ */
+enum ice_status
+ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
+			  u8 *pkt, bool frag, bool tun)
+{
+	enum ice_fltr_ptype flow;
+	u16 tnl_port;
+	u8 *loc;
+	u16 idx;
+
+	if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+		switch (input->ip.v4.proto) {
+		case IPPROTO_TCP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+			break;
+		case IPPROTO_UDP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+			break;
+		case IPPROTO_SCTP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+			break;
+		case IPPROTO_IP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+			break;
+		default:
+			return ICE_ERR_PARAM;
+		}
+	} else if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
+		switch (input->ip.v6.proto) {
+		case IPPROTO_TCP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+			break;
+		case IPPROTO_UDP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+			break;
+		case IPPROTO_SCTP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+			break;
+		case IPPROTO_IP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+			break;
+		default:
+			return ICE_ERR_PARAM;
+		}
+	} else {
+		flow = input->flow_type;
+	}
+
+	for (idx = 0; idx < ICE_FDIR_NUM_PKT; idx++)
+		if (ice_fdir_pkt[idx].flow == flow)
+			break;
+	if (idx == ICE_FDIR_NUM_PKT)
+		return ICE_ERR_PARAM;
+	if (!tun) {
+		memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len);
+		loc = pkt;
+	} else {
+		if (!ice_get_open_tunnel_port(hw, &tnl_port))
+			return ICE_ERR_DOES_NOT_EXIST;
+		if (!ice_fdir_pkt[idx].tun_pkt)
+			return ICE_ERR_PARAM;
+		memcpy(pkt, ice_fdir_pkt[idx].tun_pkt,
+		       ice_fdir_pkt[idx].tun_pkt_len);
+		ice_pkt_insert_u16(pkt, ICE_IPV4_UDP_DST_PORT_OFFSET,
+				   htons(tnl_port));
+		loc = &pkt[ICE_FDIR_TUN_PKT_OFF];
+	}
+
+	/* Reverse the src and dst, since the HW expects them to be from Tx
+	 * perspective. The input from user is from Rx filter perspective.
+	 */
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_DST_PORT_OFFSET,
+				   input->ip.v4.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		if (frag)
+			loc[20] = ICE_FDIR_IPV4_PKT_FLAG_DF;
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_DST_PORT_OFFSET,
+				   input->ip.v4.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_DST_PORT_OFFSET,
+				   input->ip.v4.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_PROTO_OFFSET, 0);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+
+	if (input->flex_fltr)
+		ice_pkt_insert_u16(loc, input->flex_offset, input->flex_word);
+
+	return 0;
+}
+
+/**
+ * ice_fdir_has_frag - does flow type have 2 ptypes
+ * @flow: flow ptype
+ *
+ * returns true is there is a fragment packet for this ptype
+ */
+bool ice_fdir_has_frag(enum ice_fltr_ptype flow)
+{
+	if (flow == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+		return true;
+	else
+		return false;
+}
+
+/**
+ * ice_fdir_find_by_idx - find filter with idx
+ * @hw: pointer to hardware structure
+ * @fltr_idx: index to find.
+ *
+ * Returns pointer to filter if found or null
+ */
+struct ice_fdir_fltr *
+ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx)
+{
+	struct ice_fdir_fltr *rule;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		/* rule ID found in the list */
+		if (fltr_idx == rule->fltr_id)
+			return rule;
+		if (fltr_idx < rule->fltr_id)
+			break;
+	}
+	return NULL;
+}
+
+/**
+ * ice_fdir_list_add_fltr - add a new node to the flow director filter list
+ * @hw: hardware structure
+ * @fltr: filter node to add to structure
+ */
+void ice_fdir_list_add_fltr(struct ice_hw *hw, struct ice_fdir_fltr *fltr)
+{
+	struct ice_fdir_fltr *rule, *parent = NULL;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		/* rule ID found or pass its spot in the list */
+		if (rule->fltr_id >= fltr->fltr_id)
+			break;
+		parent = rule;
+	}
+
+	if (parent)
+		list_add(&fltr->fltr_node, &parent->fltr_node);
+	else
+		list_add(&fltr->fltr_node, &hw->fdir_list_head);
+}
+
+/**
+ * ice_fdir_update_cntrs - increment / decrement filter counter
+ * @hw: pointer to hardware structure
+ * @flow: filter flow type
+ * @add: true implies filters added
+ */
+void
+ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add)
+{
+	int incr;
+
+	incr = add ? 1 : -1;
+	hw->fdir_active_fltr += incr;
+
+	if (flow == ICE_FLTR_PTYPE_NONF_NONE || flow >= ICE_FLTR_PTYPE_MAX)
+		ice_debug(hw, ICE_DBG_SW, "Unknown filter type %d\n", flow);
+	else
+		hw->fdir_fltr_cnt[flow] += incr;
+}
+
+/**
+ * ice_cmp_ipv6_addr - compare 2 IP v6 addresses
+ * @a: IP v6 address
+ * @b: IP v6 address
+ *
+ * Returns 0 on equal, returns non-0 if different
+ */
+static int ice_cmp_ipv6_addr(__be32 *a, __be32 *b)
+{
+	return memcmp(a, b, 4 * sizeof(__be32));
+}
+
+/**
+ * ice_fdir_comp_rules - compare 2 filters
+ * @a: a Flow Director filter data structure
+ * @b: a Flow Director filter data structure
+ * @v6: bool true if v6 filter
+ *
+ * Returns true if the filters match
+ */
+static bool
+ice_fdir_comp_rules(struct ice_fdir_fltr *a,  struct ice_fdir_fltr *b, bool v6)
+{
+	enum ice_fltr_ptype flow_type = a->flow_type;
+
+	/* The calling function already checks that the two filters have the
+	 * same flow_type.
+	 */
+	if (!v6) {
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) {
+			if (a->ip.v4.dst_ip == b->ip.v4.dst_ip &&
+			    a->ip.v4.src_ip == b->ip.v4.src_ip &&
+			    a->ip.v4.dst_port == b->ip.v4.dst_port &&
+			    a->ip.v4.src_port == b->ip.v4.src_port)
+				return true;
+		} else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+			if (a->ip.v4.dst_ip == b->ip.v4.dst_ip &&
+			    a->ip.v4.src_ip == b->ip.v4.src_ip &&
+			    a->ip.v4.l4_header == b->ip.v4.l4_header &&
+			    a->ip.v4.proto == b->ip.v4.proto &&
+			    a->ip.v4.ip_ver == b->ip.v4.ip_ver &&
+			    a->ip.v4.tos == b->ip.v4.tos)
+				return true;
+		}
+	} else {
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_SCTP) {
+			if (a->ip.v6.dst_port == b->ip.v6.dst_port &&
+			    a->ip.v6.src_port == b->ip.v6.src_port &&
+			    !ice_cmp_ipv6_addr(a->ip.v6.dst_ip,
+					       b->ip.v6.dst_ip) &&
+			    !ice_cmp_ipv6_addr(a->ip.v6.src_ip,
+					       b->ip.v6.src_ip))
+				return true;
+		} else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
+			if (a->ip.v6.dst_port == b->ip.v6.dst_port &&
+			    a->ip.v6.src_port == b->ip.v6.src_port)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+/**
+ * ice_fdir_is_dup_fltr - test if filter is already in list for PF
+ * @hw: hardware data structure
+ * @input: Flow Director filter data structure
+ *
+ * Returns true if the filter is found in the list
+ */
+bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input)
+{
+	struct ice_fdir_fltr *rule;
+	bool ret = false;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		enum ice_fltr_ptype flow_type;
+
+		if (rule->flow_type != input->flow_type)
+			continue;
+
+		flow_type = input->flow_type;
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+			ret = ice_fdir_comp_rules(rule, input, false);
+		else
+			ret = ice_fdir_comp_rules(rule, input, true);
+		if (ret) {
+			if (rule->fltr_id == input->fltr_id &&
+			    rule->q_index != input->q_index)
+				ret = false;
+			else
+				break;
+		}
+	}
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
new file mode 100644
index 000000000..1c587766d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_FDIR_H_
+#define _ICE_FDIR_H_
+
+#define ICE_FDIR_TUN_PKT_OFF		50
+#define ICE_FDIR_MAX_RAW_PKT_SIZE	(512 + ICE_FDIR_TUN_PKT_OFF)
+
+/* macros for offsets into packets for flow director programming */
+#define ICE_IPV4_SRC_ADDR_OFFSET	26
+#define ICE_IPV4_DST_ADDR_OFFSET	30
+#define ICE_IPV4_TCP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_TCP_DST_PORT_OFFSET	36
+#define ICE_IPV4_UDP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_UDP_DST_PORT_OFFSET	36
+#define ICE_IPV4_SCTP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_SCTP_DST_PORT_OFFSET	36
+#define ICE_IPV4_PROTO_OFFSET		23
+#define ICE_IPV6_SRC_ADDR_OFFSET	22
+#define ICE_IPV6_DST_ADDR_OFFSET	38
+#define ICE_IPV6_TCP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_TCP_DST_PORT_OFFSET	56
+#define ICE_IPV6_UDP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_UDP_DST_PORT_OFFSET	56
+#define ICE_IPV6_SCTP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_SCTP_DST_PORT_OFFSET	56
+/* IP v4 has 2 flag bits that enable fragment processing: DF and MF. DF
+ * requests that the packet not be fragmented. MF indicates that a packet has
+ * been fragmented.
+ */
+#define ICE_FDIR_IPV4_PKT_FLAG_DF		0x20
+
+enum ice_fltr_prgm_desc_dest {
+	ICE_FLTR_PRGM_DESC_DEST_DROP_PKT,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX,
+};
+
+enum ice_fltr_prgm_desc_fd_status {
+	ICE_FLTR_PRGM_DESC_FD_STATUS_NONE,
+	ICE_FLTR_PRGM_DESC_FD_STATUS_FD_ID,
+};
+
+/* Flow Director (FD) Filter Programming descriptor */
+struct ice_fd_fltr_desc_ctx {
+	u32 fdid;
+	u16 qindex;
+	u16 cnt_index;
+	u16 fd_vsi;
+	u16 flex_val;
+	u8 comp_q;
+	u8 comp_report;
+	u8 fd_space;
+	u8 cnt_ena;
+	u8 evict_ena;
+	u8 toq;
+	u8 toq_prio;
+	u8 dpu_recipe;
+	u8 drop;
+	u8 flex_prio;
+	u8 flex_mdid;
+	u8 dtype;
+	u8 pcmd;
+	u8 desc_prof_prio;
+	u8 desc_prof;
+	u8 swap;
+	u8 fdid_prio;
+	u8 fdid_mdid;
+};
+
+#define ICE_FLTR_PRGM_FLEX_WORD_SIZE	sizeof(__be16)
+
+struct ice_rx_flow_userdef {
+	u16 flex_word;
+	u16 flex_offset;
+	u16 flex_fltr;
+};
+
+struct ice_fdir_v4 {
+	__be32 dst_ip;
+	__be32 src_ip;
+	__be16 dst_port;
+	__be16 src_port;
+	__be32 l4_header;
+	__be32 sec_parm_idx;	/* security parameter index */
+	u8 tos;
+	u8 ip_ver;
+	u8 proto;
+};
+
+#define ICE_IPV6_ADDR_LEN_AS_U32		4
+
+struct ice_fdir_v6 {
+	__be32 dst_ip[ICE_IPV6_ADDR_LEN_AS_U32];
+	__be32 src_ip[ICE_IPV6_ADDR_LEN_AS_U32];
+	__be16 dst_port;
+	__be16 src_port;
+	__be32 l4_header; /* next header */
+	__be32 sec_parm_idx; /* security parameter index */
+	u8 tc;
+	u8 proto;
+};
+
+struct ice_fdir_extra {
+	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
+	u32 usr_def[2];		/* user data */
+	__be16 vlan_type;	/* VLAN ethertype */
+	__be16 vlan_tag;	/* VLAN tag info */
+};
+
+struct ice_fdir_fltr {
+	struct list_head fltr_node;
+	enum ice_fltr_ptype flow_type;
+
+	union {
+		struct ice_fdir_v4 v4;
+		struct ice_fdir_v6 v6;
+	} ip, mask;
+
+	struct ice_fdir_extra ext_data;
+	struct ice_fdir_extra ext_mask;
+
+	/* flex byte filter data */
+	__be16 flex_word;
+	u16 flex_offset;
+	u16 flex_fltr;
+
+	/* filter control */
+	u16 q_index;
+	u16 dest_vsi;
+	u8 dest_ctl;
+	u8 fltr_status;
+	u16 cnt_index;
+	u32 fltr_id;
+};
+
+/* Dummy packet filter definition structure */
+struct ice_fdir_base_pkt {
+	enum ice_fltr_ptype flow;
+	u16 pkt_len;
+	const u8 *pkt;
+	u16 tun_pkt_len;
+	const u8 *tun_pkt;
+};
+
+enum ice_status ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id);
+enum ice_status ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id);
+enum ice_status
+ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+enum ice_status
+ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+void
+ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
+		       struct ice_fltr_desc *fdesc, bool add);
+enum ice_status
+ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
+			  u8 *pkt, bool frag, bool tun);
+int ice_get_fdir_cnt_all(struct ice_hw *hw);
+bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input);
+bool ice_fdir_has_frag(enum ice_fltr_ptype flow);
+struct ice_fdir_fltr *
+ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx);
+void
+ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add);
+void ice_fdir_list_add_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input);
+#endif /* _ICE_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
new file mode 100644
index 000000000..a81be917f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -0,0 +1,4964 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_flex_pipe.h"
+#include "ice_flow.h"
+
+/* To support tunneling entries by PF, the package will append the PF number to
+ * the label; for example TNL_VXLAN_PF0, TNL_VXLAN_PF1, TNL_VXLAN_PF2, etc.
+ */
+static const struct ice_tunnel_type_scan tnls[] = {
+	{ TNL_VXLAN,		"TNL_VXLAN_PF" },
+	{ TNL_GENEVE,		"TNL_GENEVE_PF" },
+	{ TNL_LAST,		"" }
+};
+
+static const u32 ice_sect_lkup[ICE_BLK_COUNT][ICE_SECT_COUNT] = {
+	/* SWITCH */
+	{
+		ICE_SID_XLT0_SW,
+		ICE_SID_XLT_KEY_BUILDER_SW,
+		ICE_SID_XLT1_SW,
+		ICE_SID_XLT2_SW,
+		ICE_SID_PROFID_TCAM_SW,
+		ICE_SID_PROFID_REDIR_SW,
+		ICE_SID_FLD_VEC_SW,
+		ICE_SID_CDID_KEY_BUILDER_SW,
+		ICE_SID_CDID_REDIR_SW
+	},
+
+	/* ACL */
+	{
+		ICE_SID_XLT0_ACL,
+		ICE_SID_XLT_KEY_BUILDER_ACL,
+		ICE_SID_XLT1_ACL,
+		ICE_SID_XLT2_ACL,
+		ICE_SID_PROFID_TCAM_ACL,
+		ICE_SID_PROFID_REDIR_ACL,
+		ICE_SID_FLD_VEC_ACL,
+		ICE_SID_CDID_KEY_BUILDER_ACL,
+		ICE_SID_CDID_REDIR_ACL
+	},
+
+	/* FD */
+	{
+		ICE_SID_XLT0_FD,
+		ICE_SID_XLT_KEY_BUILDER_FD,
+		ICE_SID_XLT1_FD,
+		ICE_SID_XLT2_FD,
+		ICE_SID_PROFID_TCAM_FD,
+		ICE_SID_PROFID_REDIR_FD,
+		ICE_SID_FLD_VEC_FD,
+		ICE_SID_CDID_KEY_BUILDER_FD,
+		ICE_SID_CDID_REDIR_FD
+	},
+
+	/* RSS */
+	{
+		ICE_SID_XLT0_RSS,
+		ICE_SID_XLT_KEY_BUILDER_RSS,
+		ICE_SID_XLT1_RSS,
+		ICE_SID_XLT2_RSS,
+		ICE_SID_PROFID_TCAM_RSS,
+		ICE_SID_PROFID_REDIR_RSS,
+		ICE_SID_FLD_VEC_RSS,
+		ICE_SID_CDID_KEY_BUILDER_RSS,
+		ICE_SID_CDID_REDIR_RSS
+	},
+
+	/* PE */
+	{
+		ICE_SID_XLT0_PE,
+		ICE_SID_XLT_KEY_BUILDER_PE,
+		ICE_SID_XLT1_PE,
+		ICE_SID_XLT2_PE,
+		ICE_SID_PROFID_TCAM_PE,
+		ICE_SID_PROFID_REDIR_PE,
+		ICE_SID_FLD_VEC_PE,
+		ICE_SID_CDID_KEY_BUILDER_PE,
+		ICE_SID_CDID_REDIR_PE
+	}
+};
+
+/**
+ * ice_sect_id - returns section ID
+ * @blk: block type
+ * @sect: section type
+ *
+ * This helper function returns the proper section ID given a block type and a
+ * section type.
+ */
+static u32 ice_sect_id(enum ice_block blk, enum ice_sect sect)
+{
+	return ice_sect_lkup[blk][sect];
+}
+
+/**
+ * ice_pkg_val_buf
+ * @buf: pointer to the ice buffer
+ *
+ * This helper function validates a buffer's header.
+ */
+static struct ice_buf_hdr *ice_pkg_val_buf(struct ice_buf *buf)
+{
+	struct ice_buf_hdr *hdr;
+	u16 section_count;
+	u16 data_end;
+
+	hdr = (struct ice_buf_hdr *)buf->buf;
+	/* verify data */
+	section_count = le16_to_cpu(hdr->section_count);
+	if (section_count < ICE_MIN_S_COUNT || section_count > ICE_MAX_S_COUNT)
+		return NULL;
+
+	data_end = le16_to_cpu(hdr->data_end);
+	if (data_end < ICE_MIN_S_DATA_END || data_end > ICE_MAX_S_DATA_END)
+		return NULL;
+
+	return hdr;
+}
+
+/**
+ * ice_find_buf_table
+ * @ice_seg: pointer to the ice segment
+ *
+ * Returns the address of the buffer table within the ice segment.
+ */
+static struct ice_buf_table *ice_find_buf_table(struct ice_seg *ice_seg)
+{
+	struct ice_nvm_table *nvms;
+
+	nvms = (struct ice_nvm_table *)
+		(ice_seg->device_table +
+		 le32_to_cpu(ice_seg->device_table_count));
+
+	return (__force struct ice_buf_table *)
+		(nvms->vers + le32_to_cpu(nvms->table_count));
+}
+
+/**
+ * ice_pkg_enum_buf
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ *
+ * This function will enumerate all the buffers in the ice segment. The first
+ * call is made with the ice_seg parameter non-NULL; on subsequent calls,
+ * ice_seg is set to NULL which continues the enumeration. When the function
+ * returns a NULL pointer, then the end of the buffers has been reached, or an
+ * unexpected value has been detected (for example an invalid section count or
+ * an invalid buffer end value).
+ */
+static struct ice_buf_hdr *
+ice_pkg_enum_buf(struct ice_seg *ice_seg, struct ice_pkg_enum *state)
+{
+	if (ice_seg) {
+		state->buf_table = ice_find_buf_table(ice_seg);
+		if (!state->buf_table)
+			return NULL;
+
+		state->buf_idx = 0;
+		return ice_pkg_val_buf(state->buf_table->buf_array);
+	}
+
+	if (++state->buf_idx < le32_to_cpu(state->buf_table->buf_count))
+		return ice_pkg_val_buf(state->buf_table->buf_array +
+				       state->buf_idx);
+	else
+		return NULL;
+}
+
+/**
+ * ice_pkg_advance_sect
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ *
+ * This helper function will advance the section within the ice segment,
+ * also advancing the buffer if needed.
+ */
+static bool
+ice_pkg_advance_sect(struct ice_seg *ice_seg, struct ice_pkg_enum *state)
+{
+	if (!ice_seg && !state->buf)
+		return false;
+
+	if (!ice_seg && state->buf)
+		if (++state->sect_idx < le16_to_cpu(state->buf->section_count))
+			return true;
+
+	state->buf = ice_pkg_enum_buf(ice_seg, state);
+	if (!state->buf)
+		return false;
+
+	/* start of new buffer, reset section index */
+	state->sect_idx = 0;
+	return true;
+}
+
+/**
+ * ice_pkg_enum_section
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ * @sect_type: section type to enumerate
+ *
+ * This function will enumerate all the sections of a particular type in the
+ * ice segment. The first call is made with the ice_seg parameter non-NULL;
+ * on subsequent calls, ice_seg is set to NULL which continues the enumeration.
+ * When the function returns a NULL pointer, then the end of the matching
+ * sections has been reached.
+ */
+static void *
+ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+		     u32 sect_type)
+{
+	u16 offset, size;
+
+	if (ice_seg)
+		state->type = sect_type;
+
+	if (!ice_pkg_advance_sect(ice_seg, state))
+		return NULL;
+
+	/* scan for next matching section */
+	while (state->buf->section_entry[state->sect_idx].type !=
+	       cpu_to_le32(state->type))
+		if (!ice_pkg_advance_sect(NULL, state))
+			return NULL;
+
+	/* validate section */
+	offset = le16_to_cpu(state->buf->section_entry[state->sect_idx].offset);
+	if (offset < ICE_MIN_S_OFF || offset > ICE_MAX_S_OFF)
+		return NULL;
+
+	size = le16_to_cpu(state->buf->section_entry[state->sect_idx].size);
+	if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ)
+		return NULL;
+
+	/* make sure the section fits in the buffer */
+	if (offset + size > ICE_PKG_BUF_SIZE)
+		return NULL;
+
+	state->sect_type =
+		le32_to_cpu(state->buf->section_entry[state->sect_idx].type);
+
+	/* calc pointer to this section */
+	state->sect = ((u8 *)state->buf) +
+		le16_to_cpu(state->buf->section_entry[state->sect_idx].offset);
+
+	return state->sect;
+}
+
+/**
+ * ice_pkg_enum_entry
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ * @sect_type: section type to enumerate
+ * @offset: pointer to variable that receives the offset in the table (optional)
+ * @handler: function that handles access to the entries into the section type
+ *
+ * This function will enumerate all the entries in particular section type in
+ * the ice segment. The first call is made with the ice_seg parameter non-NULL;
+ * on subsequent calls, ice_seg is set to NULL which continues the enumeration.
+ * When the function returns a NULL pointer, then the end of the entries has
+ * been reached.
+ *
+ * Since each section may have a different header and entry size, the handler
+ * function is needed to determine the number and location entries in each
+ * section.
+ *
+ * The offset parameter is optional, but should be used for sections that
+ * contain an offset for each section table. For such cases, the section handler
+ * function must return the appropriate offset + index to give the absolution
+ * offset for each entry. For example, if the base for a section's header
+ * indicates a base offset of 10, and the index for the entry is 2, then
+ * section handler function should set the offset to 10 + 2 = 12.
+ */
+static void *
+ice_pkg_enum_entry(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+		   u32 sect_type, u32 *offset,
+		   void *(*handler)(u32 sect_type, void *section,
+				    u32 index, u32 *offset))
+{
+	void *entry;
+
+	if (ice_seg) {
+		if (!handler)
+			return NULL;
+
+		if (!ice_pkg_enum_section(ice_seg, state, sect_type))
+			return NULL;
+
+		state->entry_idx = 0;
+		state->handler = handler;
+	} else {
+		state->entry_idx++;
+	}
+
+	if (!state->handler)
+		return NULL;
+
+	/* get entry */
+	entry = state->handler(state->sect_type, state->sect, state->entry_idx,
+			       offset);
+	if (!entry) {
+		/* end of a section, look for another section of this type */
+		if (!ice_pkg_enum_section(NULL, state, 0))
+			return NULL;
+
+		state->entry_idx = 0;
+		entry = state->handler(state->sect_type, state->sect,
+				       state->entry_idx, offset);
+	}
+
+	return entry;
+}
+
+/**
+ * ice_boost_tcam_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the boost TCAM entry to be returned
+ * @offset: pointer to receive absolute offset, always 0 for boost TCAM sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual boost TCAM entries.
+ */
+static void *
+ice_boost_tcam_handler(u32 sect_type, void *section, u32 index, u32 *offset)
+{
+	struct ice_boost_tcam_section *boost;
+
+	if (!section)
+		return NULL;
+
+	if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM)
+		return NULL;
+
+	if (index > ICE_MAX_BST_TCAMS_IN_BUF)
+		return NULL;
+
+	if (offset)
+		*offset = 0;
+
+	boost = section;
+	if (index >= le16_to_cpu(boost->count))
+		return NULL;
+
+	return boost->tcam + index;
+}
+
+/**
+ * ice_find_boost_entry
+ * @ice_seg: pointer to the ice segment (non-NULL)
+ * @addr: Boost TCAM address of entry to search for
+ * @entry: returns pointer to the entry
+ *
+ * Finds a particular Boost TCAM entry and returns a pointer to that entry
+ * if it is found. The ice_seg parameter must not be NULL since the first call
+ * to ice_pkg_enum_entry requires a pointer to an actual ice_segment structure.
+ */
+static enum ice_status
+ice_find_boost_entry(struct ice_seg *ice_seg, u16 addr,
+		     struct ice_boost_tcam_entry **entry)
+{
+	struct ice_boost_tcam_entry *tcam;
+	struct ice_pkg_enum state;
+
+	memset(&state, 0, sizeof(state));
+
+	if (!ice_seg)
+		return ICE_ERR_PARAM;
+
+	do {
+		tcam = ice_pkg_enum_entry(ice_seg, &state,
+					  ICE_SID_RXPARSER_BOOST_TCAM, NULL,
+					  ice_boost_tcam_handler);
+		if (tcam && le16_to_cpu(tcam->addr) == addr) {
+			*entry = tcam;
+			return 0;
+		}
+
+		ice_seg = NULL;
+	} while (tcam);
+
+	*entry = NULL;
+	return ICE_ERR_CFG;
+}
+
+/**
+ * ice_label_enum_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the label entry to be returned
+ * @offset: pointer to receive absolute offset, always zero for label sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual label entries.
+ */
+static void *
+ice_label_enum_handler(u32 __always_unused sect_type, void *section, u32 index,
+		       u32 *offset)
+{
+	struct ice_label_section *labels;
+
+	if (!section)
+		return NULL;
+
+	if (index > ICE_MAX_LABELS_IN_BUF)
+		return NULL;
+
+	if (offset)
+		*offset = 0;
+
+	labels = section;
+	if (index >= le16_to_cpu(labels->count))
+		return NULL;
+
+	return labels->label + index;
+}
+
+/**
+ * ice_enum_labels
+ * @ice_seg: pointer to the ice segment (NULL on subsequent calls)
+ * @type: the section type that will contain the label (0 on subsequent calls)
+ * @state: ice_pkg_enum structure that will hold the state of the enumeration
+ * @value: pointer to a value that will return the label's value if found
+ *
+ * Enumerates a list of labels in the package. The caller will call
+ * ice_enum_labels(ice_seg, type, ...) to start the enumeration, then call
+ * ice_enum_labels(NULL, 0, ...) to continue. When the function returns a NULL
+ * the end of the list has been reached.
+ */
+static char *
+ice_enum_labels(struct ice_seg *ice_seg, u32 type, struct ice_pkg_enum *state,
+		u16 *value)
+{
+	struct ice_label *label;
+
+	/* Check for valid label section on first call */
+	if (type && !(type >= ICE_SID_LBL_FIRST && type <= ICE_SID_LBL_LAST))
+		return NULL;
+
+	label = ice_pkg_enum_entry(ice_seg, state, type, NULL,
+				   ice_label_enum_handler);
+	if (!label)
+		return NULL;
+
+	*value = le16_to_cpu(label->value);
+	return label->name;
+}
+
+/**
+ * ice_init_pkg_hints
+ * @hw: pointer to the HW structure
+ * @ice_seg: pointer to the segment of the package scan (non-NULL)
+ *
+ * This function will scan the package and save off relevant information
+ * (hints or metadata) for driver use. The ice_seg parameter must not be NULL
+ * since the first call to ice_enum_labels requires a pointer to an actual
+ * ice_seg structure.
+ */
+static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+	struct ice_pkg_enum state;
+	char *label_name;
+	u16 val;
+	int i;
+
+	memset(&hw->tnl, 0, sizeof(hw->tnl));
+	memset(&state, 0, sizeof(state));
+
+	if (!ice_seg)
+		return;
+
+	label_name = ice_enum_labels(ice_seg, ICE_SID_LBL_RXPARSER_TMEM, &state,
+				     &val);
+
+	while (label_name && hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) {
+		for (i = 0; tnls[i].type != TNL_LAST; i++) {
+			size_t len = strlen(tnls[i].label_prefix);
+
+			/* Look for matching label start, before continuing */
+			if (strncmp(label_name, tnls[i].label_prefix, len))
+				continue;
+
+			/* Make sure this label matches our PF. Note that the PF
+			 * character ('0' - '7') will be located where our
+			 * prefix string's null terminator is located.
+			 */
+			if ((label_name[len] - '0') == hw->pf_id) {
+				hw->tnl.tbl[hw->tnl.count].type = tnls[i].type;
+				hw->tnl.tbl[hw->tnl.count].valid = false;
+				hw->tnl.tbl[hw->tnl.count].boost_addr = val;
+				hw->tnl.tbl[hw->tnl.count].port = 0;
+				hw->tnl.count++;
+				break;
+			}
+		}
+
+		label_name = ice_enum_labels(NULL, 0, &state, &val);
+	}
+
+	/* Cache the appropriate boost TCAM entry pointers */
+	for (i = 0; i < hw->tnl.count; i++) {
+		ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr,
+				     &hw->tnl.tbl[i].boost_entry);
+		if (hw->tnl.tbl[i].boost_entry) {
+			hw->tnl.tbl[i].valid = true;
+			if (hw->tnl.tbl[i].type < __TNL_TYPE_CNT)
+				hw->tnl.valid_count[hw->tnl.tbl[i].type]++;
+		}
+	}
+}
+
+/* Key creation */
+
+#define ICE_DC_KEY	0x1	/* don't care */
+#define ICE_DC_KEYINV	0x1
+#define ICE_NM_KEY	0x0	/* never match */
+#define ICE_NM_KEYINV	0x0
+#define ICE_0_KEY	0x1	/* match 0 */
+#define ICE_0_KEYINV	0x0
+#define ICE_1_KEY	0x0	/* match 1 */
+#define ICE_1_KEYINV	0x1
+
+/**
+ * ice_gen_key_word - generate 16-bits of a key/mask word
+ * @val: the value
+ * @valid: valid bits mask (change only the valid bits)
+ * @dont_care: don't care mask
+ * @nvr_mtch: never match mask
+ * @key: pointer to an array of where the resulting key portion
+ * @key_inv: pointer to an array of where the resulting key invert portion
+ *
+ * This function generates 16-bits from a 8-bit value, an 8-bit don't care mask
+ * and an 8-bit never match mask. The 16-bits of output are divided into 8 bits
+ * of key and 8 bits of key invert.
+ *
+ *     '0' =    b01, always match a 0 bit
+ *     '1' =    b10, always match a 1 bit
+ *     '?' =    b11, don't care bit (always matches)
+ *     '~' =    b00, never match bit
+ *
+ * Input:
+ *          val:         b0  1  0  1  0  1
+ *          dont_care:   b0  0  1  1  0  0
+ *          never_mtch:  b0  0  0  0  1  1
+ *          ------------------------------
+ * Result:  key:        b01 10 11 11 00 00
+ */
+static enum ice_status
+ice_gen_key_word(u8 val, u8 valid, u8 dont_care, u8 nvr_mtch, u8 *key,
+		 u8 *key_inv)
+{
+	u8 in_key = *key, in_key_inv = *key_inv;
+	u8 i;
+
+	/* 'dont_care' and 'nvr_mtch' masks cannot overlap */
+	if ((dont_care ^ nvr_mtch) != (dont_care | nvr_mtch))
+		return ICE_ERR_CFG;
+
+	*key = 0;
+	*key_inv = 0;
+
+	/* encode the 8 bits into 8-bit key and 8-bit key invert */
+	for (i = 0; i < 8; i++) {
+		*key >>= 1;
+		*key_inv >>= 1;
+
+		if (!(valid & 0x1)) { /* change only valid bits */
+			*key |= (in_key & 0x1) << 7;
+			*key_inv |= (in_key_inv & 0x1) << 7;
+		} else if (dont_care & 0x1) { /* don't care bit */
+			*key |= ICE_DC_KEY << 7;
+			*key_inv |= ICE_DC_KEYINV << 7;
+		} else if (nvr_mtch & 0x1) { /* never match bit */
+			*key |= ICE_NM_KEY << 7;
+			*key_inv |= ICE_NM_KEYINV << 7;
+		} else if (val & 0x01) { /* exact 1 match */
+			*key |= ICE_1_KEY << 7;
+			*key_inv |= ICE_1_KEYINV << 7;
+		} else { /* exact 0 match */
+			*key |= ICE_0_KEY << 7;
+			*key_inv |= ICE_0_KEYINV << 7;
+		}
+
+		dont_care >>= 1;
+		nvr_mtch >>= 1;
+		valid >>= 1;
+		val >>= 1;
+		in_key >>= 1;
+		in_key_inv >>= 1;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_bits_max_set - determine if the number of bits set is within a maximum
+ * @mask: pointer to the byte array which is the mask
+ * @size: the number of bytes in the mask
+ * @max: the max number of set bits
+ *
+ * This function determines if there are at most 'max' number of bits set in an
+ * array. Returns true if the number for bits set is <= max or will return false
+ * otherwise.
+ */
+static bool ice_bits_max_set(const u8 *mask, u16 size, u16 max)
+{
+	u16 count = 0;
+	u16 i;
+
+	/* check each byte */
+	for (i = 0; i < size; i++) {
+		/* if 0, go to next byte */
+		if (!mask[i])
+			continue;
+
+		/* We know there is at least one set bit in this byte because of
+		 * the above check; if we already have found 'max' number of
+		 * bits set, then we can return failure now.
+		 */
+		if (count == max)
+			return false;
+
+		/* count the bits in this byte, checking threshold */
+		count += hweight8(mask[i]);
+		if (count > max)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_set_key - generate a variable sized key with multiples of 16-bits
+ * @key: pointer to where the key will be stored
+ * @size: the size of the complete key in bytes (must be even)
+ * @val: array of 8-bit values that makes up the value portion of the key
+ * @upd: array of 8-bit masks that determine what key portion to update
+ * @dc: array of 8-bit masks that make up the don't care mask
+ * @nm: array of 8-bit masks that make up the never match mask
+ * @off: the offset of the first byte in the key to update
+ * @len: the number of bytes in the key update
+ *
+ * This function generates a key from a value, a don't care mask and a never
+ * match mask.
+ * upd, dc, and nm are optional parameters, and can be NULL:
+ *	upd == NULL --> upd mask is all 1's (update all bits)
+ *	dc == NULL --> dc mask is all 0's (no don't care bits)
+ *	nm == NULL --> nm mask is all 0's (no never match bits)
+ */
+static enum ice_status
+ice_set_key(u8 *key, u16 size, u8 *val, u8 *upd, u8 *dc, u8 *nm, u16 off,
+	    u16 len)
+{
+	u16 half_size;
+	u16 i;
+
+	/* size must be a multiple of 2 bytes. */
+	if (size % 2)
+		return ICE_ERR_CFG;
+
+	half_size = size / 2;
+	if (off + len > half_size)
+		return ICE_ERR_CFG;
+
+	/* Make sure at most one bit is set in the never match mask. Having more
+	 * than one never match mask bit set will cause HW to consume excessive
+	 * power otherwise; this is a power management efficiency check.
+	 */
+#define ICE_NVR_MTCH_BITS_MAX	1
+	if (nm && !ice_bits_max_set(nm, len, ICE_NVR_MTCH_BITS_MAX))
+		return ICE_ERR_CFG;
+
+	for (i = 0; i < len; i++)
+		if (ice_gen_key_word(val[i], upd ? upd[i] : 0xff,
+				     dc ? dc[i] : 0, nm ? nm[i] : 0,
+				     key + off + i, key + half_size + off + i))
+			return ICE_ERR_CFG;
+
+	return 0;
+}
+
+/**
+ * ice_acquire_global_cfg_lock
+ * @hw: pointer to the HW structure
+ * @access: access type (read or write)
+ *
+ * This function will request ownership of the global config lock for reading
+ * or writing of the package. When attempting to obtain write access, the
+ * caller must check for the following two return values:
+ *
+ * ICE_SUCCESS        - Means the caller has acquired the global config lock
+ *                      and can perform writing of the package.
+ * ICE_ERR_AQ_NO_WORK - Indicates another driver has already written the
+ *                      package or has found that no update was necessary; in
+ *                      this case, the caller can just skip performing any
+ *                      update of the package.
+ */
+static enum ice_status
+ice_acquire_global_cfg_lock(struct ice_hw *hw,
+			    enum ice_aq_res_access_type access)
+{
+	enum ice_status status;
+
+	status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, access,
+				 ICE_GLOBAL_CFG_LOCK_TIMEOUT);
+
+	if (!status)
+		mutex_lock(&ice_global_cfg_lock_sw);
+	else if (status == ICE_ERR_AQ_NO_WORK)
+		ice_debug(hw, ICE_DBG_PKG,
+			  "Global config lock: No work to do\n");
+
+	return status;
+}
+
+/**
+ * ice_release_global_cfg_lock
+ * @hw: pointer to the HW structure
+ *
+ * This function will release the global config lock.
+ */
+static void ice_release_global_cfg_lock(struct ice_hw *hw)
+{
+	mutex_unlock(&ice_global_cfg_lock_sw);
+	ice_release_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID);
+}
+
+/**
+ * ice_acquire_change_lock
+ * @hw: pointer to the HW structure
+ * @access: access type (read or write)
+ *
+ * This function will request ownership of the change lock.
+ */
+static enum ice_status
+ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access)
+{
+	return ice_acquire_res(hw, ICE_CHANGE_LOCK_RES_ID, access,
+			       ICE_CHANGE_LOCK_TIMEOUT);
+}
+
+/**
+ * ice_release_change_lock
+ * @hw: pointer to the HW structure
+ *
+ * This function will release the change lock using the proper Admin Command.
+ */
+static void ice_release_change_lock(struct ice_hw *hw)
+{
+	ice_release_res(hw, ICE_CHANGE_LOCK_RES_ID);
+}
+
+/**
+ * ice_aq_download_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package buffer to transfer
+ * @buf_size: the size of the package buffer
+ * @last_buf: last buffer indicator
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cd: pointer to command details structure or NULL
+ *
+ * Download Package (0x0C40)
+ */
+static enum ice_status
+ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+		    u16 buf_size, bool last_buf, u32 *error_offset,
+		    u32 *error_info, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_download_pkg *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (error_offset)
+		*error_offset = 0;
+	if (error_info)
+		*error_info = 0;
+
+	cmd = &desc.params.download_pkg;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_download_pkg);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (last_buf)
+		cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF;
+
+	status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+	if (status == ICE_ERR_AQ_ERROR) {
+		/* Read error from buffer only when the FW returned an error */
+		struct ice_aqc_download_pkg_resp *resp;
+
+		resp = (struct ice_aqc_download_pkg_resp *)pkg_buf;
+		if (error_offset)
+			*error_offset = le32_to_cpu(resp->error_offset);
+		if (error_info)
+			*error_info = le32_to_cpu(resp->error_info);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_update_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package cmd buffer
+ * @buf_size: the size of the package cmd buffer
+ * @last_buf: last buffer indicator
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update Package (0x0C42)
+ */
+static enum ice_status
+ice_aq_update_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, u16 buf_size,
+		  bool last_buf, u32 *error_offset, u32 *error_info,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_download_pkg *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (error_offset)
+		*error_offset = 0;
+	if (error_info)
+		*error_info = 0;
+
+	cmd = &desc.params.download_pkg;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_pkg);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (last_buf)
+		cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF;
+
+	status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+	if (status == ICE_ERR_AQ_ERROR) {
+		/* Read error from buffer only when the FW returned an error */
+		struct ice_aqc_download_pkg_resp *resp;
+
+		resp = (struct ice_aqc_download_pkg_resp *)pkg_buf;
+		if (error_offset)
+			*error_offset = le32_to_cpu(resp->error_offset);
+		if (error_info)
+			*error_info = le32_to_cpu(resp->error_info);
+	}
+
+	return status;
+}
+
+/**
+ * ice_find_seg_in_pkg
+ * @hw: pointer to the hardware structure
+ * @seg_type: the segment type to search for (i.e., SEGMENT_TYPE_CPK)
+ * @pkg_hdr: pointer to the package header to be searched
+ *
+ * This function searches a package file for a particular segment type. On
+ * success it returns a pointer to the segment header, otherwise it will
+ * return NULL.
+ */
+static struct ice_generic_seg_hdr *
+ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type,
+		    struct ice_pkg_hdr *pkg_hdr)
+{
+	u32 i;
+
+	ice_debug(hw, ICE_DBG_PKG, "Package format version: %d.%d.%d.%d\n",
+		  pkg_hdr->pkg_format_ver.major, pkg_hdr->pkg_format_ver.minor,
+		  pkg_hdr->pkg_format_ver.update,
+		  pkg_hdr->pkg_format_ver.draft);
+
+	/* Search all package segments for the requested segment type */
+	for (i = 0; i < le32_to_cpu(pkg_hdr->seg_count); i++) {
+		struct ice_generic_seg_hdr *seg;
+
+		seg = (struct ice_generic_seg_hdr *)
+			((u8 *)pkg_hdr + le32_to_cpu(pkg_hdr->seg_offset[i]));
+
+		if (le32_to_cpu(seg->seg_type) == seg_type)
+			return seg;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_update_pkg
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains change lock and updates package.
+ */
+static enum ice_status
+ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+	enum ice_status status;
+	u32 offset, info, i;
+
+	status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+	if (status)
+		return status;
+
+	for (i = 0; i < count; i++) {
+		struct ice_buf_hdr *bh = (struct ice_buf_hdr *)(bufs + i);
+		bool last = ((i + 1) == count);
+
+		status = ice_aq_update_pkg(hw, bh, le16_to_cpu(bh->data_end),
+					   last, &offset, &info, NULL);
+
+		if (status) {
+			ice_debug(hw, ICE_DBG_PKG,
+				  "Update pkg failed: err %d off %d inf %d\n",
+				  status, offset, info);
+			break;
+		}
+	}
+
+	ice_release_change_lock(hw);
+
+	return status;
+}
+
+/**
+ * ice_dwnld_cfg_bufs
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains global config lock and downloads the package configuration buffers
+ * to the firmware. Metadata buffers are skipped, and the first metadata buffer
+ * found indicates that the rest of the buffers are all metadata buffers.
+ */
+static enum ice_status
+ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+	enum ice_status status;
+	struct ice_buf_hdr *bh;
+	u32 offset, info, i;
+
+	if (!bufs || !count)
+		return ICE_ERR_PARAM;
+
+	/* If the first buffer's first section has its metadata bit set
+	 * then there are no buffers to be downloaded, and the operation is
+	 * considered a success.
+	 */
+	bh = (struct ice_buf_hdr *)bufs;
+	if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF)
+		return 0;
+
+	/* reset pkg_dwnld_status in case this function is called in the
+	 * reset/rebuild flow
+	 */
+	hw->pkg_dwnld_status = ICE_AQ_RC_OK;
+
+	status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE);
+	if (status) {
+		if (status == ICE_ERR_AQ_NO_WORK)
+			hw->pkg_dwnld_status = ICE_AQ_RC_EEXIST;
+		else
+			hw->pkg_dwnld_status = hw->adminq.sq_last_status;
+		return status;
+	}
+
+	for (i = 0; i < count; i++) {
+		bool last = ((i + 1) == count);
+
+		if (!last) {
+			/* check next buffer for metadata flag */
+			bh = (struct ice_buf_hdr *)(bufs + i + 1);
+
+			/* A set metadata flag in the next buffer will signal
+			 * that the current buffer will be the last buffer
+			 * downloaded
+			 */
+			if (le16_to_cpu(bh->section_count))
+				if (le32_to_cpu(bh->section_entry[0].type) &
+				    ICE_METADATA_BUF)
+					last = true;
+		}
+
+		bh = (struct ice_buf_hdr *)(bufs + i);
+
+		status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, last,
+					     &offset, &info, NULL);
+
+		/* Save AQ status from download package */
+		hw->pkg_dwnld_status = hw->adminq.sq_last_status;
+		if (status) {
+			ice_debug(hw, ICE_DBG_PKG,
+				  "Pkg download failed: err %d off %d inf %d\n",
+				  status, offset, info);
+
+			break;
+		}
+
+		if (last)
+			break;
+	}
+
+	ice_release_global_cfg_lock(hw);
+
+	return status;
+}
+
+/**
+ * ice_aq_get_pkg_info_list
+ * @hw: pointer to the hardware structure
+ * @pkg_info: the buffer which will receive the information list
+ * @buf_size: the size of the pkg_info information buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Package Info List (0x0C43)
+ */
+static enum ice_status
+ice_aq_get_pkg_info_list(struct ice_hw *hw,
+			 struct ice_aqc_get_pkg_info_resp *pkg_info,
+			 u16 buf_size, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_pkg_info_list);
+
+	return ice_aq_send_cmd(hw, &desc, pkg_info, buf_size, cd);
+}
+
+/**
+ * ice_download_pkg
+ * @hw: pointer to the hardware structure
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package.
+ */
+static enum ice_status
+ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+	struct ice_buf_table *ice_buf_tbl;
+
+	ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
+		  ice_seg->hdr.seg_format_ver.major,
+		  ice_seg->hdr.seg_format_ver.minor,
+		  ice_seg->hdr.seg_format_ver.update,
+		  ice_seg->hdr.seg_format_ver.draft);
+
+	ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
+		  le32_to_cpu(ice_seg->hdr.seg_type),
+		  le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id);
+
+	ice_buf_tbl = ice_find_buf_table(ice_seg);
+
+	ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
+		  le32_to_cpu(ice_buf_tbl->buf_count));
+
+	return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
+				  le32_to_cpu(ice_buf_tbl->buf_count));
+}
+
+/**
+ * ice_init_pkg_info
+ * @hw: pointer to the hardware structure
+ * @pkg_hdr: pointer to the driver's package hdr
+ *
+ * Saves off the package details into the HW structure.
+ */
+static enum ice_status
+ice_init_pkg_info(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
+{
+	struct ice_global_metadata_seg *meta_seg;
+	struct ice_generic_seg_hdr *seg_hdr;
+
+	if (!pkg_hdr)
+		return ICE_ERR_PARAM;
+
+	meta_seg = (struct ice_global_metadata_seg *)
+		   ice_find_seg_in_pkg(hw, SEGMENT_TYPE_METADATA, pkg_hdr);
+	if (meta_seg) {
+		hw->pkg_ver = meta_seg->pkg_ver;
+		memcpy(hw->pkg_name, meta_seg->pkg_name, sizeof(hw->pkg_name));
+
+		ice_debug(hw, ICE_DBG_PKG, "Pkg: %d.%d.%d.%d, %s\n",
+			  meta_seg->pkg_ver.major, meta_seg->pkg_ver.minor,
+			  meta_seg->pkg_ver.update, meta_seg->pkg_ver.draft,
+			  meta_seg->pkg_name);
+	} else {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Did not find metadata segment in driver package\n");
+		return ICE_ERR_CFG;
+	}
+
+	seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr);
+	if (seg_hdr) {
+		hw->ice_pkg_ver = seg_hdr->seg_format_ver;
+		memcpy(hw->ice_pkg_name, seg_hdr->seg_id,
+		       sizeof(hw->ice_pkg_name));
+
+		ice_debug(hw, ICE_DBG_PKG, "Ice Seg: %d.%d.%d.%d, %s\n",
+			  seg_hdr->seg_format_ver.major,
+			  seg_hdr->seg_format_ver.minor,
+			  seg_hdr->seg_format_ver.update,
+			  seg_hdr->seg_format_ver.draft,
+			  seg_hdr->seg_id);
+	} else {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Did not find ice segment in driver package\n");
+		return ICE_ERR_CFG;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_pkg_info
+ * @hw: pointer to the hardware structure
+ *
+ * Store details of the package currently loaded in HW into the HW structure.
+ */
+static enum ice_status ice_get_pkg_info(struct ice_hw *hw)
+{
+	struct ice_aqc_get_pkg_info_resp *pkg_info;
+	enum ice_status status;
+	u16 size;
+	u32 i;
+
+	size = struct_size(pkg_info, pkg_info, ICE_PKG_CNT);
+	pkg_info = kzalloc(size, GFP_KERNEL);
+	if (!pkg_info)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_aq_get_pkg_info_list(hw, pkg_info, size, NULL);
+	if (status)
+		goto init_pkg_free_alloc;
+
+	for (i = 0; i < le32_to_cpu(pkg_info->count); i++) {
+#define ICE_PKG_FLAG_COUNT	4
+		char flags[ICE_PKG_FLAG_COUNT + 1] = { 0 };
+		u8 place = 0;
+
+		if (pkg_info->pkg_info[i].is_active) {
+			flags[place++] = 'A';
+			hw->active_pkg_ver = pkg_info->pkg_info[i].ver;
+			hw->active_track_id =
+				le32_to_cpu(pkg_info->pkg_info[i].track_id);
+			memcpy(hw->active_pkg_name,
+			       pkg_info->pkg_info[i].name,
+			       sizeof(pkg_info->pkg_info[i].name));
+			hw->active_pkg_in_nvm = pkg_info->pkg_info[i].is_in_nvm;
+		}
+		if (pkg_info->pkg_info[i].is_active_at_boot)
+			flags[place++] = 'B';
+		if (pkg_info->pkg_info[i].is_modified)
+			flags[place++] = 'M';
+		if (pkg_info->pkg_info[i].is_in_nvm)
+			flags[place++] = 'N';
+
+		ice_debug(hw, ICE_DBG_PKG, "Pkg[%d]: %d.%d.%d.%d,%s,%s\n",
+			  i, pkg_info->pkg_info[i].ver.major,
+			  pkg_info->pkg_info[i].ver.minor,
+			  pkg_info->pkg_info[i].ver.update,
+			  pkg_info->pkg_info[i].ver.draft,
+			  pkg_info->pkg_info[i].name, flags);
+	}
+
+init_pkg_free_alloc:
+	kfree(pkg_info);
+
+	return status;
+}
+
+/**
+ * ice_verify_pkg - verify package
+ * @pkg: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * Verifies various attributes of the package file, including length, format
+ * version, and the requirement of at least one segment.
+ */
+static enum ice_status ice_verify_pkg(struct ice_pkg_hdr *pkg, u32 len)
+{
+	u32 seg_count;
+	u32 i;
+
+	if (len < struct_size(pkg, seg_offset, 1))
+		return ICE_ERR_BUF_TOO_SHORT;
+
+	if (pkg->pkg_format_ver.major != ICE_PKG_FMT_VER_MAJ ||
+	    pkg->pkg_format_ver.minor != ICE_PKG_FMT_VER_MNR ||
+	    pkg->pkg_format_ver.update != ICE_PKG_FMT_VER_UPD ||
+	    pkg->pkg_format_ver.draft != ICE_PKG_FMT_VER_DFT)
+		return ICE_ERR_CFG;
+
+	/* pkg must have at least one segment */
+	seg_count = le32_to_cpu(pkg->seg_count);
+	if (seg_count < 1)
+		return ICE_ERR_CFG;
+
+	/* make sure segment array fits in package length */
+	if (len < struct_size(pkg, seg_offset, seg_count))
+		return ICE_ERR_BUF_TOO_SHORT;
+
+	/* all segments must fit within length */
+	for (i = 0; i < seg_count; i++) {
+		u32 off = le32_to_cpu(pkg->seg_offset[i]);
+		struct ice_generic_seg_hdr *seg;
+
+		/* segment header must fit */
+		if (len < off + sizeof(*seg))
+			return ICE_ERR_BUF_TOO_SHORT;
+
+		seg = (struct ice_generic_seg_hdr *)((u8 *)pkg + off);
+
+		/* segment body must fit */
+		if (len < off + le32_to_cpu(seg->seg_size))
+			return ICE_ERR_BUF_TOO_SHORT;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_free_seg - free package segment pointer
+ * @hw: pointer to the hardware structure
+ *
+ * Frees the package segment pointer in the proper manner, depending on if the
+ * segment was allocated or just the passed in pointer was stored.
+ */
+void ice_free_seg(struct ice_hw *hw)
+{
+	if (hw->pkg_copy) {
+		devm_kfree(ice_hw_to_dev(hw), hw->pkg_copy);
+		hw->pkg_copy = NULL;
+		hw->pkg_size = 0;
+	}
+	hw->seg = NULL;
+}
+
+/**
+ * ice_init_pkg_regs - initialize additional package registers
+ * @hw: pointer to the hardware structure
+ */
+static void ice_init_pkg_regs(struct ice_hw *hw)
+{
+#define ICE_SW_BLK_INP_MASK_L 0xFFFFFFFF
+#define ICE_SW_BLK_INP_MASK_H 0x0000FFFF
+#define ICE_SW_BLK_IDX	0
+
+	/* setup Switch block input mask, which is 48-bits in two parts */
+	wr32(hw, GL_PREEXT_L2_PMASK0(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_L);
+	wr32(hw, GL_PREEXT_L2_PMASK1(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_H);
+}
+
+/**
+ * ice_chk_pkg_version - check package version for compatibility with driver
+ * @pkg_ver: pointer to a version structure to check
+ *
+ * Check to make sure that the package about to be downloaded is compatible with
+ * the driver. To be compatible, the major and minor components of the package
+ * version must match our ICE_PKG_SUPP_VER_MAJ and ICE_PKG_SUPP_VER_MNR
+ * definitions.
+ */
+static enum ice_status ice_chk_pkg_version(struct ice_pkg_ver *pkg_ver)
+{
+	if (pkg_ver->major != ICE_PKG_SUPP_VER_MAJ ||
+	    pkg_ver->minor != ICE_PKG_SUPP_VER_MNR)
+		return ICE_ERR_NOT_SUPPORTED;
+
+	return 0;
+}
+
+/**
+ * ice_chk_pkg_compat
+ * @hw: pointer to the hardware structure
+ * @ospkg: pointer to the package hdr
+ * @seg: pointer to the package segment hdr
+ *
+ * This function checks the package version compatibility with driver and NVM
+ */
+static enum ice_status
+ice_chk_pkg_compat(struct ice_hw *hw, struct ice_pkg_hdr *ospkg,
+		   struct ice_seg **seg)
+{
+	struct ice_aqc_get_pkg_info_resp *pkg;
+	enum ice_status status;
+	u16 size;
+	u32 i;
+
+	/* Check package version compatibility */
+	status = ice_chk_pkg_version(&hw->pkg_ver);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Package version check failed.\n");
+		return status;
+	}
+
+	/* find ICE segment in given package */
+	*seg = (struct ice_seg *)ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE,
+						     ospkg);
+	if (!*seg) {
+		ice_debug(hw, ICE_DBG_INIT, "no ice segment in package.\n");
+		return ICE_ERR_CFG;
+	}
+
+	/* Check if FW is compatible with the OS package */
+	size = struct_size(pkg, pkg_info, ICE_PKG_CNT);
+	pkg = kzalloc(size, GFP_KERNEL);
+	if (!pkg)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_aq_get_pkg_info_list(hw, pkg, size, NULL);
+	if (status)
+		goto fw_ddp_compat_free_alloc;
+
+	for (i = 0; i < le32_to_cpu(pkg->count); i++) {
+		/* loop till we find the NVM package */
+		if (!pkg->pkg_info[i].is_in_nvm)
+			continue;
+		if ((*seg)->hdr.seg_format_ver.major !=
+			pkg->pkg_info[i].ver.major ||
+		    (*seg)->hdr.seg_format_ver.minor >
+			pkg->pkg_info[i].ver.minor) {
+			status = ICE_ERR_FW_DDP_MISMATCH;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "OS package is not compatible with NVM.\n");
+		}
+		/* done processing NVM package so break */
+		break;
+	}
+fw_ddp_compat_free_alloc:
+	kfree(pkg);
+	return status;
+}
+
+/**
+ * ice_init_pkg - initialize/download package
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * This function initializes a package. The package contains HW tables
+ * required to do packet processing. First, the function extracts package
+ * information such as version. Then it finds the ice configuration segment
+ * within the package; this function then saves a copy of the segment pointer
+ * within the supplied package buffer. Next, the function will cache any hints
+ * from the package, followed by downloading the package itself. Note, that if
+ * a previous PF driver has already downloaded the package successfully, then
+ * the current driver will not have to download the package again.
+ *
+ * The local package contents will be used to query default behavior and to
+ * update specific sections of the HW's version of the package (e.g. to update
+ * the parse graph to understand new protocols).
+ *
+ * This function stores a pointer to the package buffer memory, and it is
+ * expected that the supplied buffer will not be freed immediately. If the
+ * package buffer needs to be freed, such as when read from a file, use
+ * ice_copy_and_init_pkg() instead of directly calling ice_init_pkg() in this
+ * case.
+ */
+enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
+{
+	struct ice_pkg_hdr *pkg;
+	enum ice_status status;
+	struct ice_seg *seg;
+
+	if (!buf || !len)
+		return ICE_ERR_PARAM;
+
+	pkg = (struct ice_pkg_hdr *)buf;
+	status = ice_verify_pkg(pkg, len);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "failed to verify pkg (err: %d)\n",
+			  status);
+		return status;
+	}
+
+	/* initialize package info */
+	status = ice_init_pkg_info(hw, pkg);
+	if (status)
+		return status;
+
+	/* before downloading the package, check package version for
+	 * compatibility with driver
+	 */
+	status = ice_chk_pkg_compat(hw, pkg, &seg);
+	if (status)
+		return status;
+
+	/* initialize package hints and then download package */
+	ice_init_pkg_hints(hw, seg);
+	status = ice_download_pkg(hw, seg);
+	if (status == ICE_ERR_AQ_NO_WORK) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "package previously loaded - no work.\n");
+		status = 0;
+	}
+
+	/* Get information on the package currently loaded in HW, then make sure
+	 * the driver is compatible with this version.
+	 */
+	if (!status) {
+		status = ice_get_pkg_info(hw);
+		if (!status)
+			status = ice_chk_pkg_version(&hw->active_pkg_ver);
+	}
+
+	if (!status) {
+		hw->seg = seg;
+		/* on successful package download update other required
+		 * registers to support the package and fill HW tables
+		 * with package content.
+		 */
+		ice_init_pkg_regs(hw);
+		ice_fill_blk_tbls(hw);
+	} else {
+		ice_debug(hw, ICE_DBG_INIT, "package load failed, %d\n",
+			  status);
+	}
+
+	return status;
+}
+
+/**
+ * ice_copy_and_init_pkg - initialize/download a copy of the package
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * This function copies the package buffer, and then calls ice_init_pkg() to
+ * initialize the copied package contents.
+ *
+ * The copying is necessary if the package buffer supplied is constant, or if
+ * the memory may disappear shortly after calling this function.
+ *
+ * If the package buffer resides in the data segment and can be modified, the
+ * caller is free to use ice_init_pkg() instead of ice_copy_and_init_pkg().
+ *
+ * However, if the package buffer needs to be copied first, such as when being
+ * read from a file, the caller should use ice_copy_and_init_pkg().
+ *
+ * This function will first copy the package buffer, before calling
+ * ice_init_pkg(). The caller is free to immediately destroy the original
+ * package buffer, as the new copy will be managed by this function and
+ * related routines.
+ */
+enum ice_status ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len)
+{
+	enum ice_status status;
+	u8 *buf_copy;
+
+	if (!buf || !len)
+		return ICE_ERR_PARAM;
+
+	buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL);
+
+	status = ice_init_pkg(hw, buf_copy, len);
+	if (status) {
+		/* Free the copy, since we failed to initialize the package */
+		devm_kfree(ice_hw_to_dev(hw), buf_copy);
+	} else {
+		/* Track the copied pkg so we can free it later */
+		hw->pkg_copy = buf_copy;
+		hw->pkg_size = len;
+	}
+
+	return status;
+}
+
+/**
+ * ice_pkg_buf_alloc
+ * @hw: pointer to the HW structure
+ *
+ * Allocates a package buffer and returns a pointer to the buffer header.
+ * Note: all package contents must be in Little Endian form.
+ */
+static struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw)
+{
+	struct ice_buf_build *bld;
+	struct ice_buf_hdr *buf;
+
+	bld = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*bld), GFP_KERNEL);
+	if (!bld)
+		return NULL;
+
+	buf = (struct ice_buf_hdr *)bld;
+	buf->data_end = cpu_to_le16(offsetof(struct ice_buf_hdr,
+					     section_entry));
+	return bld;
+}
+
+/**
+ * ice_pkg_buf_free
+ * @hw: pointer to the HW structure
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Frees a package buffer
+ */
+static void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld)
+{
+	devm_kfree(ice_hw_to_dev(hw), bld);
+}
+
+/**
+ * ice_pkg_buf_reserve_section
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ * @count: the number of sections to reserve
+ *
+ * Reserves one or more section table entries in a package buffer. This routine
+ * can be called multiple times as long as they are made before calling
+ * ice_pkg_buf_alloc_section(). Once ice_pkg_buf_alloc_section()
+ * is called once, the number of sections that can be allocated will not be able
+ * to be increased; not using all reserved sections is fine, but this will
+ * result in some wasted space in the buffer.
+ * Note: all package contents must be in Little Endian form.
+ */
+static enum ice_status
+ice_pkg_buf_reserve_section(struct ice_buf_build *bld, u16 count)
+{
+	struct ice_buf_hdr *buf;
+	u16 section_count;
+	u16 data_end;
+
+	if (!bld)
+		return ICE_ERR_PARAM;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+
+	/* already an active section, can't increase table size */
+	section_count = le16_to_cpu(buf->section_count);
+	if (section_count > 0)
+		return ICE_ERR_CFG;
+
+	if (bld->reserved_section_table_entries + count > ICE_MAX_S_COUNT)
+		return ICE_ERR_CFG;
+	bld->reserved_section_table_entries += count;
+
+	data_end = le16_to_cpu(buf->data_end) +
+		   (count * sizeof(buf->section_entry[0]));
+	buf->data_end = cpu_to_le16(data_end);
+
+	return 0;
+}
+
+/**
+ * ice_pkg_buf_alloc_section
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ * @type: the section type value
+ * @size: the size of the section to reserve (in bytes)
+ *
+ * Reserves memory in the buffer for a section's content and updates the
+ * buffers' status accordingly. This routine returns a pointer to the first
+ * byte of the section start within the buffer, which is used to fill in the
+ * section contents.
+ * Note: all package contents must be in Little Endian form.
+ */
+static void *
+ice_pkg_buf_alloc_section(struct ice_buf_build *bld, u32 type, u16 size)
+{
+	struct ice_buf_hdr *buf;
+	u16 sect_count;
+	u16 data_end;
+
+	if (!bld || !type || !size)
+		return NULL;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+
+	/* check for enough space left in buffer */
+	data_end = le16_to_cpu(buf->data_end);
+
+	/* section start must align on 4 byte boundary */
+	data_end = ALIGN(data_end, 4);
+
+	if ((data_end + size) > ICE_MAX_S_DATA_END)
+		return NULL;
+
+	/* check for more available section table entries */
+	sect_count = le16_to_cpu(buf->section_count);
+	if (sect_count < bld->reserved_section_table_entries) {
+		void *section_ptr = ((u8 *)buf) + data_end;
+
+		buf->section_entry[sect_count].offset = cpu_to_le16(data_end);
+		buf->section_entry[sect_count].size = cpu_to_le16(size);
+		buf->section_entry[sect_count].type = cpu_to_le32(type);
+
+		data_end += size;
+		buf->data_end = cpu_to_le16(data_end);
+
+		buf->section_count = cpu_to_le16(sect_count + 1);
+		return section_ptr;
+	}
+
+	/* no free section table entries */
+	return NULL;
+}
+
+/**
+ * ice_pkg_buf_get_active_sections
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Returns the number of active sections. Before using the package buffer
+ * in an update package command, the caller should make sure that there is at
+ * least one active section - otherwise, the buffer is not legal and should
+ * not be used.
+ * Note: all package contents must be in Little Endian form.
+ */
+static u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld)
+{
+	struct ice_buf_hdr *buf;
+
+	if (!bld)
+		return 0;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+	return le16_to_cpu(buf->section_count);
+}
+
+/**
+ * ice_pkg_buf
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Return a pointer to the buffer's header
+ */
+static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld)
+{
+	if (!bld)
+		return NULL;
+
+	return &bld->buf;
+}
+
+/**
+ * ice_get_open_tunnel_port - retrieve an open tunnel port
+ * @hw: pointer to the HW structure
+ * @port: returns open port
+ */
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port)
+{
+	bool res = false;
+	u16 i;
+
+	mutex_lock(&hw->tnl_lock);
+
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port) {
+			*port = hw->tnl.tbl[i].port;
+			res = true;
+			break;
+		}
+
+	mutex_unlock(&hw->tnl_lock);
+
+	return res;
+}
+
+/**
+ * ice_tunnel_idx_to_entry - convert linear index to the sparse one
+ * @hw: pointer to the HW structure
+ * @type: type of tunnel
+ * @idx: linear index
+ *
+ * Stack assumes we have 2 linear tables with indexes [0, count_valid),
+ * but really the port table may be sprase, and types are mixed, so convert
+ * the stack index into the device index.
+ */
+static u16 ice_tunnel_idx_to_entry(struct ice_hw *hw, enum ice_tunnel_type type,
+				   u16 idx)
+{
+	u16 i;
+
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].valid &&
+		    hw->tnl.tbl[i].type == type &&
+		    idx-- == 0)
+			return i;
+
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+/**
+ * ice_create_tunnel
+ * @hw: pointer to the HW structure
+ * @index: device table entry
+ * @type: type of tunnel
+ * @port: port of tunnel to create
+ *
+ * Create a tunnel by updating the parse graph in the parser. We do that by
+ * creating a package buffer with the tunnel info and issuing an update package
+ * command.
+ */
+static enum ice_status
+ice_create_tunnel(struct ice_hw *hw, u16 index,
+		  enum ice_tunnel_type type, u16 port)
+{
+	struct ice_boost_tcam_section *sect_rx, *sect_tx;
+	enum ice_status status = ICE_ERR_MAX_LIMIT;
+	struct ice_buf_build *bld;
+
+	mutex_lock(&hw->tnl_lock);
+
+	bld = ice_pkg_buf_alloc(hw);
+	if (!bld) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_create_tunnel_end;
+	}
+
+	/* allocate 2 sections, one for Rx parser, one for Tx parser */
+	if (ice_pkg_buf_reserve_section(bld, 2))
+		goto ice_create_tunnel_err;
+
+	sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+					    struct_size(sect_rx, tcam, 1));
+	if (!sect_rx)
+		goto ice_create_tunnel_err;
+	sect_rx->count = cpu_to_le16(1);
+
+	sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+					    struct_size(sect_tx, tcam, 1));
+	if (!sect_tx)
+		goto ice_create_tunnel_err;
+	sect_tx->count = cpu_to_le16(1);
+
+	/* copy original boost entry to update package buffer */
+	memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_rx->tcam));
+
+	/* over-write the never-match dest port key bits with the encoded port
+	 * bits
+	 */
+	ice_set_key((u8 *)&sect_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key),
+		    (u8 *)&port, NULL, NULL, NULL,
+		    (u16)offsetof(struct ice_boost_key_value, hv_dst_port_key),
+		    sizeof(sect_rx->tcam[0].key.key.hv_dst_port_key));
+
+	/* exact copy of entry to Tx section entry */
+	memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam));
+
+	status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
+	if (!status)
+		hw->tnl.tbl[index].port = port;
+
+ice_create_tunnel_err:
+	ice_pkg_buf_free(hw, bld);
+
+ice_create_tunnel_end:
+	mutex_unlock(&hw->tnl_lock);
+
+	return status;
+}
+
+/**
+ * ice_destroy_tunnel
+ * @hw: pointer to the HW structure
+ * @index: device table entry
+ * @type: type of tunnel
+ * @port: port of tunnel to destroy (ignored if the all parameter is true)
+ *
+ * Destroys a tunnel or all tunnels by creating an update package buffer
+ * targeting the specific updates requested and then performing an update
+ * package.
+ */
+static enum ice_status
+ice_destroy_tunnel(struct ice_hw *hw, u16 index, enum ice_tunnel_type type,
+		   u16 port)
+{
+	struct ice_boost_tcam_section *sect_rx, *sect_tx;
+	enum ice_status status = ICE_ERR_MAX_LIMIT;
+	struct ice_buf_build *bld;
+
+	mutex_lock(&hw->tnl_lock);
+
+	if (WARN_ON(!hw->tnl.tbl[index].valid ||
+		    hw->tnl.tbl[index].type != type ||
+		    hw->tnl.tbl[index].port != port)) {
+		status = ICE_ERR_OUT_OF_RANGE;
+		goto ice_destroy_tunnel_end;
+	}
+
+	bld = ice_pkg_buf_alloc(hw);
+	if (!bld) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_destroy_tunnel_end;
+	}
+
+	/* allocate 2 sections, one for Rx parser, one for Tx parser */
+	if (ice_pkg_buf_reserve_section(bld, 2))
+		goto ice_destroy_tunnel_err;
+
+	sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+					    struct_size(sect_rx, tcam, 1));
+	if (!sect_rx)
+		goto ice_destroy_tunnel_err;
+	sect_rx->count = cpu_to_le16(1);
+
+	sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+					    struct_size(sect_tx, tcam, 1));
+	if (!sect_tx)
+		goto ice_destroy_tunnel_err;
+	sect_tx->count = cpu_to_le16(1);
+
+	/* copy original boost entry to update package buffer, one copy to Rx
+	 * section, another copy to the Tx section
+	 */
+	memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_rx->tcam));
+	memcpy(sect_tx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_tx->tcam));
+
+	status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
+	if (!status)
+		hw->tnl.tbl[index].port = 0;
+
+ice_destroy_tunnel_err:
+	ice_pkg_buf_free(hw, bld);
+
+ice_destroy_tunnel_end:
+	mutex_unlock(&hw->tnl_lock);
+
+	return status;
+}
+
+int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+			    unsigned int idx, struct udp_tunnel_info *ti)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	enum ice_tunnel_type tnl_type;
+	enum ice_status status;
+	u16 index;
+
+	tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE;
+	index = ice_tunnel_idx_to_entry(&pf->hw, tnl_type, idx);
+
+	status = ice_create_tunnel(&pf->hw, index, tnl_type, ntohs(ti->port));
+	if (status) {
+		netdev_err(netdev, "Error adding UDP tunnel - %s\n",
+			   ice_stat_str(status));
+		return -EIO;
+	}
+
+	udp_tunnel_nic_set_port_priv(netdev, table, idx, index);
+	return 0;
+}
+
+int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+			      unsigned int idx, struct udp_tunnel_info *ti)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	enum ice_tunnel_type tnl_type;
+	enum ice_status status;
+
+	tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE;
+
+	status = ice_destroy_tunnel(&pf->hw, ti->hw_priv, tnl_type,
+				    ntohs(ti->port));
+	if (status) {
+		netdev_err(netdev, "Error removing UDP tunnel - %s\n",
+			   ice_stat_str(status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* PTG Management */
+
+/**
+ * ice_ptg_find_ptype - Search for packet type group using packet type (ptype)
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to search for
+ * @ptg: pointer to variable that receives the PTG
+ *
+ * This function will search the PTGs for a particular ptype, returning the
+ * PTG ID that contains it through the PTG parameter, with the value of
+ * ICE_DEFAULT_PTG (0) meaning it is part the default PTG.
+ */
+static enum ice_status
+ice_ptg_find_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 *ptg)
+{
+	if (ptype >= ICE_XLT1_CNT || !ptg)
+		return ICE_ERR_PARAM;
+
+	*ptg = hw->blk[blk].xlt1.ptypes[ptype].ptg;
+	return 0;
+}
+
+/**
+ * ice_ptg_alloc_val - Allocates a new packet type group ID by value
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptg: the PTG to allocate
+ *
+ * This function allocates a given packet type group ID specified by the PTG
+ * parameter.
+ */
+static void ice_ptg_alloc_val(struct ice_hw *hw, enum ice_block blk, u8 ptg)
+{
+	hw->blk[blk].xlt1.ptg_tbl[ptg].in_use = true;
+}
+
+/**
+ * ice_ptg_remove_ptype - Removes ptype from a particular packet type group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to remove
+ * @ptg: the PTG to remove the ptype from
+ *
+ * This function will remove the ptype from the specific PTG, and move it to
+ * the default PTG (ICE_DEFAULT_PTG).
+ */
+static enum ice_status
+ice_ptg_remove_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg)
+{
+	struct ice_ptg_ptype **ch;
+	struct ice_ptg_ptype *p;
+
+	if (ptype > ICE_XLT1_CNT - 1)
+		return ICE_ERR_PARAM;
+
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	/* Should not happen if .in_use is set, bad config */
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype)
+		return ICE_ERR_CFG;
+
+	/* find the ptype within this PTG, and bypass the link over it */
+	p = hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	ch = &hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	while (p) {
+		if (ptype == (p - hw->blk[blk].xlt1.ptypes)) {
+			*ch = p->next_ptype;
+			break;
+		}
+
+		ch = &p->next_ptype;
+		p = p->next_ptype;
+	}
+
+	hw->blk[blk].xlt1.ptypes[ptype].ptg = ICE_DEFAULT_PTG;
+	hw->blk[blk].xlt1.ptypes[ptype].next_ptype = NULL;
+
+	return 0;
+}
+
+/**
+ * ice_ptg_add_mv_ptype - Adds/moves ptype to a particular packet type group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to add or move
+ * @ptg: the PTG to add or move the ptype to
+ *
+ * This function will either add or move a ptype to a particular PTG depending
+ * on if the ptype is already part of another group. Note that using a
+ * a destination PTG ID of ICE_DEFAULT_PTG (0) will move the ptype to the
+ * default PTG.
+ */
+static enum ice_status
+ice_ptg_add_mv_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg)
+{
+	enum ice_status status;
+	u8 original_ptg;
+
+	if (ptype > ICE_XLT1_CNT - 1)
+		return ICE_ERR_PARAM;
+
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use && ptg != ICE_DEFAULT_PTG)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	status = ice_ptg_find_ptype(hw, blk, ptype, &original_ptg);
+	if (status)
+		return status;
+
+	/* Is ptype already in the correct PTG? */
+	if (original_ptg == ptg)
+		return 0;
+
+	/* Remove from original PTG and move back to the default PTG */
+	if (original_ptg != ICE_DEFAULT_PTG)
+		ice_ptg_remove_ptype(hw, blk, ptype, original_ptg);
+
+	/* Moving to default PTG? Then we're done with this request */
+	if (ptg == ICE_DEFAULT_PTG)
+		return 0;
+
+	/* Add ptype to PTG at beginning of list */
+	hw->blk[blk].xlt1.ptypes[ptype].next_ptype =
+		hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype =
+		&hw->blk[blk].xlt1.ptypes[ptype];
+
+	hw->blk[blk].xlt1.ptypes[ptype].ptg = ptg;
+	hw->blk[blk].xlt1.t[ptype] = ptg;
+
+	return 0;
+}
+
+/* Block / table size info */
+struct ice_blk_size_details {
+	u16 xlt1;			/* # XLT1 entries */
+	u16 xlt2;			/* # XLT2 entries */
+	u16 prof_tcam;			/* # profile ID TCAM entries */
+	u16 prof_id;			/* # profile IDs */
+	u8 prof_cdid_bits;		/* # CDID one-hot bits used in key */
+	u16 prof_redir;			/* # profile redirection entries */
+	u16 es;				/* # extraction sequence entries */
+	u16 fvw;			/* # field vector words */
+	u8 overwrite;			/* overwrite existing entries allowed */
+	u8 reverse;			/* reverse FV order */
+};
+
+static const struct ice_blk_size_details blk_sizes[ICE_BLK_COUNT] = {
+	/**
+	 * Table Definitions
+	 * XLT1 - Number of entries in XLT1 table
+	 * XLT2 - Number of entries in XLT2 table
+	 * TCAM - Number of entries Profile ID TCAM table
+	 * CDID - Control Domain ID of the hardware block
+	 * PRED - Number of entries in the Profile Redirection Table
+	 * FV   - Number of entries in the Field Vector
+	 * FVW  - Width (in WORDs) of the Field Vector
+	 * OVR  - Overwrite existing table entries
+	 * REV  - Reverse FV
+	 */
+	/*          XLT1        , XLT2        ,TCAM, PID,CDID,PRED,   FV, FVW */
+	/*          Overwrite   , Reverse FV */
+	/* SW  */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 256,   0,  256, 256,  48,
+		    false, false },
+	/* ACL */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  32,
+		    false, false },
+	/* FD  */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  24,
+		    false, true  },
+	/* RSS */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  24,
+		    true,  true  },
+	/* PE  */ { ICE_XLT1_CNT, ICE_XLT2_CNT,  64,  32,   0,   32,  32,  24,
+		    false, false },
+};
+
+enum ice_sid_all {
+	ICE_SID_XLT1_OFF = 0,
+	ICE_SID_XLT2_OFF,
+	ICE_SID_PR_OFF,
+	ICE_SID_PR_REDIR_OFF,
+	ICE_SID_ES_OFF,
+	ICE_SID_OFF_COUNT,
+};
+
+/* Characteristic handling */
+
+/**
+ * ice_match_prop_lst - determine if properties of two lists match
+ * @list1: first properties list
+ * @list2: second properties list
+ *
+ * Count, cookies and the order must match in order to be considered equivalent.
+ */
+static bool
+ice_match_prop_lst(struct list_head *list1, struct list_head *list2)
+{
+	struct ice_vsig_prof *tmp1;
+	struct ice_vsig_prof *tmp2;
+	u16 chk_count = 0;
+	u16 count = 0;
+
+	/* compare counts */
+	list_for_each_entry(tmp1, list1, list)
+		count++;
+	list_for_each_entry(tmp2, list2, list)
+		chk_count++;
+	if (!count || count != chk_count)
+		return false;
+
+	tmp1 = list_first_entry(list1, struct ice_vsig_prof, list);
+	tmp2 = list_first_entry(list2, struct ice_vsig_prof, list);
+
+	/* profile cookies must compare, and in the exact same order to take
+	 * into account priority
+	 */
+	while (count--) {
+		if (tmp2->profile_cookie != tmp1->profile_cookie)
+			return false;
+
+		tmp1 = list_next_entry(tmp1, list);
+		tmp2 = list_next_entry(tmp2, list);
+	}
+
+	return true;
+}
+
+/* VSIG Management */
+
+/**
+ * ice_vsig_find_vsi - find a VSIG that contains a specified VSI
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI of interest
+ * @vsig: pointer to receive the VSI group
+ *
+ * This function will lookup the VSI entry in the XLT2 list and return
+ * the VSI group its associated with.
+ */
+static enum ice_status
+ice_vsig_find_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 *vsig)
+{
+	if (!vsig || vsi >= ICE_MAX_VSI)
+		return ICE_ERR_PARAM;
+
+	/* As long as there's a default or valid VSIG associated with the input
+	 * VSI, the functions returns a success. Any handling of VSIG will be
+	 * done by the following add, update or remove functions.
+	 */
+	*vsig = hw->blk[blk].xlt2.vsis[vsi].vsig;
+
+	return 0;
+}
+
+/**
+ * ice_vsig_alloc_val - allocate a new VSIG by value
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: the VSIG to allocate
+ *
+ * This function will allocate a given VSIG specified by the VSIG parameter.
+ */
+static u16 ice_vsig_alloc_val(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) {
+		INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst);
+		hw->blk[blk].xlt2.vsig_tbl[idx].in_use = true;
+	}
+
+	return ICE_VSIG_VALUE(idx, hw->pf_id);
+}
+
+/**
+ * ice_vsig_alloc - Finds a free entry and allocates a new VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ *
+ * This function will iterate through the VSIG list and mark the first
+ * unused entry for the new VSIG entry as used and return that value.
+ */
+static u16 ice_vsig_alloc(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (!hw->blk[blk].xlt2.vsig_tbl[i].in_use)
+			return ice_vsig_alloc_val(hw, blk, i);
+
+	return ICE_DEFAULT_VSIG;
+}
+
+/**
+ * ice_find_dup_props_vsig - find VSI group with a specified set of properties
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @chs: characteristic list
+ * @vsig: returns the VSIG with the matching profiles, if found
+ *
+ * Each VSIG is associated with a characteristic set; i.e. all VSIs under
+ * a group have the same characteristic set. To check if there exists a VSIG
+ * which has the same characteristics as the input characteristics; this
+ * function will iterate through the XLT2 list and return the VSIG that has a
+ * matching configuration. In order to make sure that priorities are accounted
+ * for, the list must match exactly, including the order in which the
+ * characteristics are listed.
+ */
+static enum ice_status
+ice_find_dup_props_vsig(struct ice_hw *hw, enum ice_block blk,
+			struct list_head *chs, u16 *vsig)
+{
+	struct ice_xlt2 *xlt2 = &hw->blk[blk].xlt2;
+	u16 i;
+
+	for (i = 0; i < xlt2->count; i++)
+		if (xlt2->vsig_tbl[i].in_use &&
+		    ice_match_prop_lst(chs, &xlt2->vsig_tbl[i].prop_lst)) {
+			*vsig = ICE_VSIG_VALUE(i, hw->pf_id);
+			return 0;
+		}
+
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_vsig_free - free VSI group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to remove
+ *
+ * The function will remove all VSIs associated with the input VSIG and move
+ * them to the DEFAULT_VSIG and mark the VSIG available.
+ */
+static enum ice_status
+ice_vsig_free(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	struct ice_vsig_prof *dtmp, *del;
+	struct ice_vsig_vsi *vsi_cur;
+	u16 idx;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+	if (idx >= ICE_MAX_VSIGS)
+		return ICE_ERR_PARAM;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	hw->blk[blk].xlt2.vsig_tbl[idx].in_use = false;
+
+	vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	/* If the VSIG has at least 1 VSI then iterate through the
+	 * list and remove the VSIs before deleting the group.
+	 */
+	if (vsi_cur) {
+		/* remove all vsis associated with this VSIG XLT2 entry */
+		do {
+			struct ice_vsig_vsi *tmp = vsi_cur->next_vsi;
+
+			vsi_cur->vsig = ICE_DEFAULT_VSIG;
+			vsi_cur->changed = 1;
+			vsi_cur->next_vsi = NULL;
+			vsi_cur = tmp;
+		} while (vsi_cur);
+
+		/* NULL terminate head of VSI list */
+		hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi = NULL;
+	}
+
+	/* free characteristic list */
+	list_for_each_entry_safe(del, dtmp,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list) {
+		list_del(&del->list);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	/* if VSIG characteristic list was cleared for reset
+	 * re-initialize the list head
+	 */
+	INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst);
+
+	return 0;
+}
+
+/**
+ * ice_vsig_remove_vsi - remove VSI from VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI to remove
+ * @vsig: VSI group to remove from
+ *
+ * The function will remove the input VSI from its VSI group and move it
+ * to the DEFAULT_VSIG.
+ */
+static enum ice_status
+ice_vsig_remove_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
+{
+	struct ice_vsig_vsi **vsi_head, *vsi_cur, *vsi_tgt;
+	u16 idx;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS)
+		return ICE_ERR_PARAM;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	/* entry already in default VSIG, don't have to remove */
+	if (idx == ICE_DEFAULT_VSIG)
+		return 0;
+
+	vsi_head = &hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	if (!(*vsi_head))
+		return ICE_ERR_CFG;
+
+	vsi_tgt = &hw->blk[blk].xlt2.vsis[vsi];
+	vsi_cur = (*vsi_head);
+
+	/* iterate the VSI list, skip over the entry to be removed */
+	while (vsi_cur) {
+		if (vsi_tgt == vsi_cur) {
+			(*vsi_head) = vsi_cur->next_vsi;
+			break;
+		}
+		vsi_head = &vsi_cur->next_vsi;
+		vsi_cur = vsi_cur->next_vsi;
+	}
+
+	/* verify if VSI was removed from group list */
+	if (!vsi_cur)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	vsi_cur->vsig = ICE_DEFAULT_VSIG;
+	vsi_cur->changed = 1;
+	vsi_cur->next_vsi = NULL;
+
+	return 0;
+}
+
+/**
+ * ice_vsig_add_mv_vsi - add or move a VSI to a VSI group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI to move
+ * @vsig: destination VSI group
+ *
+ * This function will move or add the input VSI to the target VSIG.
+ * The function will find the original VSIG the VSI belongs to and
+ * move the entry to the DEFAULT_VSIG, update the original VSIG and
+ * then move entry to the new VSIG.
+ */
+static enum ice_status
+ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
+{
+	struct ice_vsig_vsi *tmp;
+	enum ice_status status;
+	u16 orig_vsig, idx;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS)
+		return ICE_ERR_PARAM;
+
+	/* if VSIG not in use and VSIG is not default type this VSIG
+	 * doesn't exist.
+	 */
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use &&
+	    vsig != ICE_DEFAULT_VSIG)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig);
+	if (status)
+		return status;
+
+	/* no update required if vsigs match */
+	if (orig_vsig == vsig)
+		return 0;
+
+	if (orig_vsig != ICE_DEFAULT_VSIG) {
+		/* remove entry from orig_vsig and add to default VSIG */
+		status = ice_vsig_remove_vsi(hw, blk, vsi, orig_vsig);
+		if (status)
+			return status;
+	}
+
+	if (idx == ICE_DEFAULT_VSIG)
+		return 0;
+
+	/* Create VSI entry and add VSIG and prop_mask values */
+	hw->blk[blk].xlt2.vsis[vsi].vsig = vsig;
+	hw->blk[blk].xlt2.vsis[vsi].changed = 1;
+
+	/* Add new entry to the head of the VSIG list */
+	tmp = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi =
+		&hw->blk[blk].xlt2.vsis[vsi];
+	hw->blk[blk].xlt2.vsis[vsi].next_vsi = tmp;
+	hw->blk[blk].xlt2.t[vsi] = vsig;
+
+	return 0;
+}
+
+/**
+ * ice_find_prof_id - find profile ID for a given field vector
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @fv: field vector to search for
+ * @prof_id: receives the profile ID
+ */
+static enum ice_status
+ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
+		 struct ice_fv_word *fv, u8 *prof_id)
+{
+	struct ice_es *es = &hw->blk[blk].es;
+	u16 off;
+	u8 i;
+
+	/* For FD, we don't want to re-use a existed profile with the same
+	 * field vector and mask. This will cause rule interference.
+	 */
+	if (blk == ICE_BLK_FD)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	for (i = 0; i < (u8)es->count; i++) {
+		off = i * es->fvw;
+
+		if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv)))
+			continue;
+
+		*prof_id = i;
+		return 0;
+	}
+
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_prof_id_rsrc_type - get profile ID resource type for a block type
+ * @blk: the block type
+ * @rsrc_type: pointer to variable to receive the resource type
+ */
+static bool ice_prof_id_rsrc_type(enum ice_block blk, u16 *rsrc_type)
+{
+	switch (blk) {
+	case ICE_BLK_FD:
+		*rsrc_type = ICE_AQC_RES_TYPE_FD_PROF_BLDR_PROFID;
+		break;
+	case ICE_BLK_RSS:
+		*rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID;
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+/**
+ * ice_tcam_ent_rsrc_type - get TCAM entry resource type for a block type
+ * @blk: the block type
+ * @rsrc_type: pointer to variable to receive the resource type
+ */
+static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type)
+{
+	switch (blk) {
+	case ICE_BLK_FD:
+		*rsrc_type = ICE_AQC_RES_TYPE_FD_PROF_BLDR_TCAM;
+		break;
+	case ICE_BLK_RSS:
+		*rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM;
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+/**
+ * ice_alloc_tcam_ent - allocate hardware TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block to allocate the TCAM for
+ * @tcam_idx: pointer to variable to receive the TCAM entry
+ *
+ * This function allocates a new entry in a Profile ID TCAM for a specific
+ * block.
+ */
+static enum ice_status
+ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 *tcam_idx)
+{
+	u16 res_type;
+
+	if (!ice_tcam_ent_rsrc_type(blk, &res_type))
+		return ICE_ERR_PARAM;
+
+	return ice_alloc_hw_res(hw, res_type, 1, true, tcam_idx);
+}
+
+/**
+ * ice_free_tcam_ent - free hardware TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the TCAM entry
+ * @tcam_idx: the TCAM entry to free
+ *
+ * This function frees an entry in a Profile ID TCAM for a specific block.
+ */
+static enum ice_status
+ice_free_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 tcam_idx)
+{
+	u16 res_type;
+
+	if (!ice_tcam_ent_rsrc_type(blk, &res_type))
+		return ICE_ERR_PARAM;
+
+	return ice_free_hw_res(hw, res_type, 1, &tcam_idx);
+}
+
+/**
+ * ice_alloc_prof_id - allocate profile ID
+ * @hw: pointer to the HW struct
+ * @blk: the block to allocate the profile ID for
+ * @prof_id: pointer to variable to receive the profile ID
+ *
+ * This function allocates a new profile ID, which also corresponds to a Field
+ * Vector (Extraction Sequence) entry.
+ */
+static enum ice_status
+ice_alloc_prof_id(struct ice_hw *hw, enum ice_block blk, u8 *prof_id)
+{
+	enum ice_status status;
+	u16 res_type;
+	u16 get_prof;
+
+	if (!ice_prof_id_rsrc_type(blk, &res_type))
+		return ICE_ERR_PARAM;
+
+	status = ice_alloc_hw_res(hw, res_type, 1, false, &get_prof);
+	if (!status)
+		*prof_id = (u8)get_prof;
+
+	return status;
+}
+
+/**
+ * ice_free_prof_id - free profile ID
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID to free
+ *
+ * This function frees a profile ID, which also corresponds to a Field Vector.
+ */
+static enum ice_status
+ice_free_prof_id(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	u16 tmp_prof_id = (u16)prof_id;
+	u16 res_type;
+
+	if (!ice_prof_id_rsrc_type(blk, &res_type))
+		return ICE_ERR_PARAM;
+
+	return ice_free_hw_res(hw, res_type, 1, &tmp_prof_id);
+}
+
+/**
+ * ice_prof_inc_ref - increment reference count for profile
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID for which to increment the reference count
+ */
+static enum ice_status
+ice_prof_inc_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	if (prof_id > hw->blk[blk].es.count)
+		return ICE_ERR_PARAM;
+
+	hw->blk[blk].es.ref_count[prof_id]++;
+
+	return 0;
+}
+
+/**
+ * ice_write_es - write an extraction sequence to hardware
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write the extraction sequence
+ * @prof_id: the profile ID to write
+ * @fv: pointer to the extraction sequence to write - NULL to clear extraction
+ */
+static void
+ice_write_es(struct ice_hw *hw, enum ice_block blk, u8 prof_id,
+	     struct ice_fv_word *fv)
+{
+	u16 off;
+
+	off = prof_id * hw->blk[blk].es.fvw;
+	if (!fv) {
+		memset(&hw->blk[blk].es.t[off], 0,
+		       hw->blk[blk].es.fvw * sizeof(*fv));
+		hw->blk[blk].es.written[prof_id] = false;
+	} else {
+		memcpy(&hw->blk[blk].es.t[off], fv,
+		       hw->blk[blk].es.fvw * sizeof(*fv));
+	}
+}
+
+/**
+ * ice_prof_dec_ref - decrement reference count for profile
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID for which to decrement the reference count
+ */
+static enum ice_status
+ice_prof_dec_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	if (prof_id > hw->blk[blk].es.count)
+		return ICE_ERR_PARAM;
+
+	if (hw->blk[blk].es.ref_count[prof_id] > 0) {
+		if (!--hw->blk[blk].es.ref_count[prof_id]) {
+			ice_write_es(hw, blk, prof_id, NULL);
+			return ice_free_prof_id(hw, blk, prof_id);
+		}
+	}
+
+	return 0;
+}
+
+/* Block / table section IDs */
+static const u32 ice_blk_sids[ICE_BLK_COUNT][ICE_SID_OFF_COUNT] = {
+	/* SWITCH */
+	{	ICE_SID_XLT1_SW,
+		ICE_SID_XLT2_SW,
+		ICE_SID_PROFID_TCAM_SW,
+		ICE_SID_PROFID_REDIR_SW,
+		ICE_SID_FLD_VEC_SW
+	},
+
+	/* ACL */
+	{	ICE_SID_XLT1_ACL,
+		ICE_SID_XLT2_ACL,
+		ICE_SID_PROFID_TCAM_ACL,
+		ICE_SID_PROFID_REDIR_ACL,
+		ICE_SID_FLD_VEC_ACL
+	},
+
+	/* FD */
+	{	ICE_SID_XLT1_FD,
+		ICE_SID_XLT2_FD,
+		ICE_SID_PROFID_TCAM_FD,
+		ICE_SID_PROFID_REDIR_FD,
+		ICE_SID_FLD_VEC_FD
+	},
+
+	/* RSS */
+	{	ICE_SID_XLT1_RSS,
+		ICE_SID_XLT2_RSS,
+		ICE_SID_PROFID_TCAM_RSS,
+		ICE_SID_PROFID_REDIR_RSS,
+		ICE_SID_FLD_VEC_RSS
+	},
+
+	/* PE */
+	{	ICE_SID_XLT1_PE,
+		ICE_SID_XLT2_PE,
+		ICE_SID_PROFID_TCAM_PE,
+		ICE_SID_PROFID_REDIR_PE,
+		ICE_SID_FLD_VEC_PE
+	}
+};
+
+/**
+ * ice_init_sw_xlt1_db - init software XLT1 database from HW tables
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block to initialize
+ */
+static void ice_init_sw_xlt1_db(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 pt;
+
+	for (pt = 0; pt < hw->blk[blk].xlt1.count; pt++) {
+		u8 ptg;
+
+		ptg = hw->blk[blk].xlt1.t[pt];
+		if (ptg != ICE_DEFAULT_PTG) {
+			ice_ptg_alloc_val(hw, blk, ptg);
+			ice_ptg_add_mv_ptype(hw, blk, pt, ptg);
+		}
+	}
+}
+
+/**
+ * ice_init_sw_xlt2_db - init software XLT2 database from HW tables
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block to initialize
+ */
+static void ice_init_sw_xlt2_db(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 vsi;
+
+	for (vsi = 0; vsi < hw->blk[blk].xlt2.count; vsi++) {
+		u16 vsig;
+
+		vsig = hw->blk[blk].xlt2.t[vsi];
+		if (vsig) {
+			ice_vsig_alloc_val(hw, blk, vsig);
+			ice_vsig_add_mv_vsi(hw, blk, vsi, vsig);
+			/* no changes at this time, since this has been
+			 * initialized from the original package
+			 */
+			hw->blk[blk].xlt2.vsis[vsi].changed = 0;
+		}
+	}
+}
+
+/**
+ * ice_init_sw_db - init software database from HW tables
+ * @hw: pointer to the hardware structure
+ */
+static void ice_init_sw_db(struct ice_hw *hw)
+{
+	u16 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		ice_init_sw_xlt1_db(hw, (enum ice_block)i);
+		ice_init_sw_xlt2_db(hw, (enum ice_block)i);
+	}
+}
+
+/**
+ * ice_fill_tbl - Reads content of a single table type into database
+ * @hw: pointer to the hardware structure
+ * @block_id: Block ID of the table to copy
+ * @sid: Section ID of the table to copy
+ *
+ * Will attempt to read the entire content of a given table of a single block
+ * into the driver database. We assume that the buffer will always
+ * be as large or larger than the data contained in the package. If
+ * this condition is not met, there is most likely an error in the package
+ * contents.
+ */
+static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
+{
+	u32 dst_len, sect_len, offset = 0;
+	struct ice_prof_redir_section *pr;
+	struct ice_prof_id_section *pid;
+	struct ice_xlt1_section *xlt1;
+	struct ice_xlt2_section *xlt2;
+	struct ice_sw_fv_section *es;
+	struct ice_pkg_enum state;
+	u8 *src, *dst;
+	void *sect;
+
+	/* if the HW segment pointer is null then the first iteration of
+	 * ice_pkg_enum_section() will fail. In this case the HW tables will
+	 * not be filled and return success.
+	 */
+	if (!hw->seg) {
+		ice_debug(hw, ICE_DBG_PKG, "hw->seg is NULL, tables are not filled\n");
+		return;
+	}
+
+	memset(&state, 0, sizeof(state));
+
+	sect = ice_pkg_enum_section(hw->seg, &state, sid);
+
+	while (sect) {
+		switch (sid) {
+		case ICE_SID_XLT1_SW:
+		case ICE_SID_XLT1_FD:
+		case ICE_SID_XLT1_RSS:
+		case ICE_SID_XLT1_ACL:
+		case ICE_SID_XLT1_PE:
+			xlt1 = (struct ice_xlt1_section *)sect;
+			src = xlt1->value;
+			sect_len = le16_to_cpu(xlt1->count) *
+				sizeof(*hw->blk[block_id].xlt1.t);
+			dst = hw->blk[block_id].xlt1.t;
+			dst_len = hw->blk[block_id].xlt1.count *
+				sizeof(*hw->blk[block_id].xlt1.t);
+			break;
+		case ICE_SID_XLT2_SW:
+		case ICE_SID_XLT2_FD:
+		case ICE_SID_XLT2_RSS:
+		case ICE_SID_XLT2_ACL:
+		case ICE_SID_XLT2_PE:
+			xlt2 = (struct ice_xlt2_section *)sect;
+			src = (__force u8 *)xlt2->value;
+			sect_len = le16_to_cpu(xlt2->count) *
+				sizeof(*hw->blk[block_id].xlt2.t);
+			dst = (u8 *)hw->blk[block_id].xlt2.t;
+			dst_len = hw->blk[block_id].xlt2.count *
+				sizeof(*hw->blk[block_id].xlt2.t);
+			break;
+		case ICE_SID_PROFID_TCAM_SW:
+		case ICE_SID_PROFID_TCAM_FD:
+		case ICE_SID_PROFID_TCAM_RSS:
+		case ICE_SID_PROFID_TCAM_ACL:
+		case ICE_SID_PROFID_TCAM_PE:
+			pid = (struct ice_prof_id_section *)sect;
+			src = (u8 *)pid->entry;
+			sect_len = le16_to_cpu(pid->count) *
+				sizeof(*hw->blk[block_id].prof.t);
+			dst = (u8 *)hw->blk[block_id].prof.t;
+			dst_len = hw->blk[block_id].prof.count *
+				sizeof(*hw->blk[block_id].prof.t);
+			break;
+		case ICE_SID_PROFID_REDIR_SW:
+		case ICE_SID_PROFID_REDIR_FD:
+		case ICE_SID_PROFID_REDIR_RSS:
+		case ICE_SID_PROFID_REDIR_ACL:
+		case ICE_SID_PROFID_REDIR_PE:
+			pr = (struct ice_prof_redir_section *)sect;
+			src = pr->redir_value;
+			sect_len = le16_to_cpu(pr->count) *
+				sizeof(*hw->blk[block_id].prof_redir.t);
+			dst = hw->blk[block_id].prof_redir.t;
+			dst_len = hw->blk[block_id].prof_redir.count *
+				sizeof(*hw->blk[block_id].prof_redir.t);
+			break;
+		case ICE_SID_FLD_VEC_SW:
+		case ICE_SID_FLD_VEC_FD:
+		case ICE_SID_FLD_VEC_RSS:
+		case ICE_SID_FLD_VEC_ACL:
+		case ICE_SID_FLD_VEC_PE:
+			es = (struct ice_sw_fv_section *)sect;
+			src = (u8 *)es->fv;
+			sect_len = (u32)(le16_to_cpu(es->count) *
+					 hw->blk[block_id].es.fvw) *
+				sizeof(*hw->blk[block_id].es.t);
+			dst = (u8 *)hw->blk[block_id].es.t;
+			dst_len = (u32)(hw->blk[block_id].es.count *
+					hw->blk[block_id].es.fvw) *
+				sizeof(*hw->blk[block_id].es.t);
+			break;
+		default:
+			return;
+		}
+
+		/* if the section offset exceeds destination length, terminate
+		 * table fill.
+		 */
+		if (offset > dst_len)
+			return;
+
+		/* if the sum of section size and offset exceed destination size
+		 * then we are out of bounds of the HW table size for that PF.
+		 * Changing section length to fill the remaining table space
+		 * of that PF.
+		 */
+		if ((offset + sect_len) > dst_len)
+			sect_len = dst_len - offset;
+
+		memcpy(dst + offset, src, sect_len);
+		offset += sect_len;
+		sect = ice_pkg_enum_section(NULL, &state, sid);
+	}
+}
+
+/**
+ * ice_fill_blk_tbls - Read package context for tables
+ * @hw: pointer to the hardware structure
+ *
+ * Reads the current package contents and populates the driver
+ * database with the data iteratively for all advanced feature
+ * blocks. Assume that the HW tables have been allocated.
+ */
+void ice_fill_blk_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		enum ice_block blk_id = (enum ice_block)i;
+
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt1.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt2.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof_redir.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].es.sid);
+	}
+
+	ice_init_sw_db(hw);
+}
+
+/**
+ * ice_free_prof_map - free profile map
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_free_prof_map(struct ice_hw *hw, u8 blk_idx)
+{
+	struct ice_es *es = &hw->blk[blk_idx].es;
+	struct ice_prof_map *del, *tmp;
+
+	mutex_lock(&es->prof_map_lock);
+	list_for_each_entry_safe(del, tmp, &es->prof_map, list) {
+		list_del(&del->list);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+	INIT_LIST_HEAD(&es->prof_map);
+	mutex_unlock(&es->prof_map_lock);
+}
+
+/**
+ * ice_free_flow_profs - free flow profile entries
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_free_flow_profs(struct ice_hw *hw, u8 blk_idx)
+{
+	struct ice_flow_prof *p, *tmp;
+
+	mutex_lock(&hw->fl_profs_locks[blk_idx]);
+	list_for_each_entry_safe(p, tmp, &hw->fl_profs[blk_idx], l_entry) {
+		struct ice_flow_entry *e, *t;
+
+		list_for_each_entry_safe(e, t, &p->entries, l_entry)
+			ice_flow_rem_entry(hw, (enum ice_block)blk_idx,
+					   ICE_FLOW_ENTRY_HNDL(e));
+
+		list_del(&p->l_entry);
+
+		mutex_destroy(&p->entries_lock);
+		devm_kfree(ice_hw_to_dev(hw), p);
+	}
+	mutex_unlock(&hw->fl_profs_locks[blk_idx]);
+
+	/* if driver is in reset and tables are being cleared
+	 * re-initialize the flow profile list heads
+	 */
+	INIT_LIST_HEAD(&hw->fl_profs[blk_idx]);
+}
+
+/**
+ * ice_free_vsig_tbl - free complete VSIG table entries
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block on which to free the VSIG table entries
+ */
+static void ice_free_vsig_tbl(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl)
+		return;
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (hw->blk[blk].xlt2.vsig_tbl[i].in_use)
+			ice_vsig_free(hw, blk, i);
+}
+
+/**
+ * ice_free_hw_tbls - free hardware table memory
+ * @hw: pointer to the hardware structure
+ */
+void ice_free_hw_tbls(struct ice_hw *hw)
+{
+	struct ice_rss_cfg *r, *rt;
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		if (hw->blk[i].is_list_init) {
+			struct ice_es *es = &hw->blk[i].es;
+
+			ice_free_prof_map(hw, i);
+			mutex_destroy(&es->prof_map_lock);
+
+			ice_free_flow_profs(hw, i);
+			mutex_destroy(&hw->fl_profs_locks[i]);
+
+			hw->blk[i].is_list_init = false;
+		}
+		ice_free_vsig_tbl(hw, (enum ice_block)i);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptypes);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptg_tbl);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsig_tbl);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsis);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof_redir.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.ref_count);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written);
+	}
+
+	list_for_each_entry_safe(r, rt, &hw->rss_list_head, l_entry) {
+		list_del(&r->l_entry);
+		devm_kfree(ice_hw_to_dev(hw), r);
+	}
+	mutex_destroy(&hw->rss_locks);
+	memset(hw->blk, 0, sizeof(hw->blk));
+}
+
+/**
+ * ice_init_flow_profs - init flow profile locks and list heads
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_init_flow_profs(struct ice_hw *hw, u8 blk_idx)
+{
+	mutex_init(&hw->fl_profs_locks[blk_idx]);
+	INIT_LIST_HEAD(&hw->fl_profs[blk_idx]);
+}
+
+/**
+ * ice_clear_hw_tbls - clear HW tables and flow profiles
+ * @hw: pointer to the hardware structure
+ */
+void ice_clear_hw_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
+		struct ice_prof_tcam *prof = &hw->blk[i].prof;
+		struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1;
+		struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
+		struct ice_es *es = &hw->blk[i].es;
+
+		if (hw->blk[i].is_list_init) {
+			ice_free_prof_map(hw, i);
+			ice_free_flow_profs(hw, i);
+		}
+
+		ice_free_vsig_tbl(hw, (enum ice_block)i);
+
+		memset(xlt1->ptypes, 0, xlt1->count * sizeof(*xlt1->ptypes));
+		memset(xlt1->ptg_tbl, 0,
+		       ICE_MAX_PTGS * sizeof(*xlt1->ptg_tbl));
+		memset(xlt1->t, 0, xlt1->count * sizeof(*xlt1->t));
+
+		memset(xlt2->vsis, 0, xlt2->count * sizeof(*xlt2->vsis));
+		memset(xlt2->vsig_tbl, 0,
+		       xlt2->count * sizeof(*xlt2->vsig_tbl));
+		memset(xlt2->t, 0, xlt2->count * sizeof(*xlt2->t));
+
+		memset(prof->t, 0, prof->count * sizeof(*prof->t));
+		memset(prof_redir->t, 0,
+		       prof_redir->count * sizeof(*prof_redir->t));
+
+		memset(es->t, 0, es->count * sizeof(*es->t) * es->fvw);
+		memset(es->ref_count, 0, es->count * sizeof(*es->ref_count));
+		memset(es->written, 0, es->count * sizeof(*es->written));
+	}
+}
+
+/**
+ * ice_init_hw_tbls - init hardware table memory
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_init_hw_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	mutex_init(&hw->rss_locks);
+	INIT_LIST_HEAD(&hw->rss_list_head);
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
+		struct ice_prof_tcam *prof = &hw->blk[i].prof;
+		struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1;
+		struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
+		struct ice_es *es = &hw->blk[i].es;
+		u16 j;
+
+		if (hw->blk[i].is_list_init)
+			continue;
+
+		ice_init_flow_profs(hw, i);
+		mutex_init(&es->prof_map_lock);
+		INIT_LIST_HEAD(&es->prof_map);
+		hw->blk[i].is_list_init = true;
+
+		hw->blk[i].overwrite = blk_sizes[i].overwrite;
+		es->reverse = blk_sizes[i].reverse;
+
+		xlt1->sid = ice_blk_sids[i][ICE_SID_XLT1_OFF];
+		xlt1->count = blk_sizes[i].xlt1;
+
+		xlt1->ptypes = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count,
+					    sizeof(*xlt1->ptypes), GFP_KERNEL);
+
+		if (!xlt1->ptypes)
+			goto err;
+
+		xlt1->ptg_tbl = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_PTGS,
+					     sizeof(*xlt1->ptg_tbl),
+					     GFP_KERNEL);
+
+		if (!xlt1->ptg_tbl)
+			goto err;
+
+		xlt1->t = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count,
+				       sizeof(*xlt1->t), GFP_KERNEL);
+		if (!xlt1->t)
+			goto err;
+
+		xlt2->sid = ice_blk_sids[i][ICE_SID_XLT2_OFF];
+		xlt2->count = blk_sizes[i].xlt2;
+
+		xlt2->vsis = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+					  sizeof(*xlt2->vsis), GFP_KERNEL);
+
+		if (!xlt2->vsis)
+			goto err;
+
+		xlt2->vsig_tbl = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+					      sizeof(*xlt2->vsig_tbl),
+					      GFP_KERNEL);
+		if (!xlt2->vsig_tbl)
+			goto err;
+
+		for (j = 0; j < xlt2->count; j++)
+			INIT_LIST_HEAD(&xlt2->vsig_tbl[j].prop_lst);
+
+		xlt2->t = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+				       sizeof(*xlt2->t), GFP_KERNEL);
+		if (!xlt2->t)
+			goto err;
+
+		prof->sid = ice_blk_sids[i][ICE_SID_PR_OFF];
+		prof->count = blk_sizes[i].prof_tcam;
+		prof->max_prof_id = blk_sizes[i].prof_id;
+		prof->cdid_bits = blk_sizes[i].prof_cdid_bits;
+		prof->t = devm_kcalloc(ice_hw_to_dev(hw), prof->count,
+				       sizeof(*prof->t), GFP_KERNEL);
+
+		if (!prof->t)
+			goto err;
+
+		prof_redir->sid = ice_blk_sids[i][ICE_SID_PR_REDIR_OFF];
+		prof_redir->count = blk_sizes[i].prof_redir;
+		prof_redir->t = devm_kcalloc(ice_hw_to_dev(hw),
+					     prof_redir->count,
+					     sizeof(*prof_redir->t),
+					     GFP_KERNEL);
+
+		if (!prof_redir->t)
+			goto err;
+
+		es->sid = ice_blk_sids[i][ICE_SID_ES_OFF];
+		es->count = blk_sizes[i].es;
+		es->fvw = blk_sizes[i].fvw;
+		es->t = devm_kcalloc(ice_hw_to_dev(hw),
+				     (u32)(es->count * es->fvw),
+				     sizeof(*es->t), GFP_KERNEL);
+		if (!es->t)
+			goto err;
+
+		es->ref_count = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					     sizeof(*es->ref_count),
+					     GFP_KERNEL);
+		if (!es->ref_count)
+			goto err;
+
+		es->written = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					   sizeof(*es->written), GFP_KERNEL);
+		if (!es->written)
+			goto err;
+	}
+	return 0;
+
+err:
+	ice_free_hw_tbls(hw);
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_prof_gen_key - generate profile ID key
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write profile ID to
+ * @ptg: packet type group (PTG) portion of key
+ * @vsig: VSIG portion of key
+ * @cdid: CDID portion of key
+ * @flags: flag portion of key
+ * @vl_msk: valid mask
+ * @dc_msk: don't care mask
+ * @nm_msk: never match mask
+ * @key: output of profile ID key
+ */
+static enum ice_status
+ice_prof_gen_key(struct ice_hw *hw, enum ice_block blk, u8 ptg, u16 vsig,
+		 u8 cdid, u16 flags, u8 vl_msk[ICE_TCAM_KEY_VAL_SZ],
+		 u8 dc_msk[ICE_TCAM_KEY_VAL_SZ], u8 nm_msk[ICE_TCAM_KEY_VAL_SZ],
+		 u8 key[ICE_TCAM_KEY_SZ])
+{
+	struct ice_prof_id_key inkey;
+
+	inkey.xlt1 = ptg;
+	inkey.xlt2_cdid = cpu_to_le16(vsig);
+	inkey.flags = cpu_to_le16(flags);
+
+	switch (hw->blk[blk].prof.cdid_bits) {
+	case 0:
+		break;
+	case 2:
+#define ICE_CD_2_M 0xC000U
+#define ICE_CD_2_S 14
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_2_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_2_S);
+		break;
+	case 4:
+#define ICE_CD_4_M 0xF000U
+#define ICE_CD_4_S 12
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_4_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_4_S);
+		break;
+	case 8:
+#define ICE_CD_8_M 0xFF00U
+#define ICE_CD_8_S 16
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_8_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_8_S);
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "Error in profile config\n");
+		break;
+	}
+
+	return ice_set_key(key, ICE_TCAM_KEY_SZ, (u8 *)&inkey, vl_msk, dc_msk,
+			   nm_msk, 0, ICE_TCAM_KEY_SZ / 2);
+}
+
+/**
+ * ice_tcam_write_entry - write TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write profile ID to
+ * @idx: the entry index to write to
+ * @prof_id: profile ID
+ * @ptg: packet type group (PTG) portion of key
+ * @vsig: VSIG portion of key
+ * @cdid: CDID portion of key
+ * @flags: flag portion of key
+ * @vl_msk: valid mask
+ * @dc_msk: don't care mask
+ * @nm_msk: never match mask
+ */
+static enum ice_status
+ice_tcam_write_entry(struct ice_hw *hw, enum ice_block blk, u16 idx,
+		     u8 prof_id, u8 ptg, u16 vsig, u8 cdid, u16 flags,
+		     u8 vl_msk[ICE_TCAM_KEY_VAL_SZ],
+		     u8 dc_msk[ICE_TCAM_KEY_VAL_SZ],
+		     u8 nm_msk[ICE_TCAM_KEY_VAL_SZ])
+{
+	struct ice_prof_tcam_entry;
+	enum ice_status status;
+
+	status = ice_prof_gen_key(hw, blk, ptg, vsig, cdid, flags, vl_msk,
+				  dc_msk, nm_msk, hw->blk[blk].prof.t[idx].key);
+	if (!status) {
+		hw->blk[blk].prof.t[idx].addr = cpu_to_le16(idx);
+		hw->blk[blk].prof.t[idx].prof_id = prof_id;
+	}
+
+	return status;
+}
+
+/**
+ * ice_vsig_get_ref - returns number of VSIs belong to a VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to query
+ * @refs: pointer to variable to receive the reference count
+ */
+static enum ice_status
+ice_vsig_get_ref(struct ice_hw *hw, enum ice_block blk, u16 vsig, u16 *refs)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_vsi *ptr;
+
+	*refs = 0;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	ptr = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	while (ptr) {
+		(*refs)++;
+		ptr = ptr->next_vsi;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_has_prof_vsig - check to see if VSIG has a specific profile
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to check against
+ * @hdl: profile handle
+ */
+static bool
+ice_has_prof_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_prof *ent;
+
+	list_for_each_entry(ent, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list)
+		if (ent->profile_cookie == hdl)
+			return true;
+
+	ice_debug(hw, ICE_DBG_INIT,
+		  "Characteristic list for VSI group %d not found.\n",
+		  vsig);
+	return false;
+}
+
+/**
+ * ice_prof_bld_es - build profile ID extraction sequence changes
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_prof_bld_es(struct ice_hw *hw, enum ice_block blk,
+		struct ice_buf_build *bld, struct list_head *chgs)
+{
+	u16 vec_size = hw->blk[blk].es.fvw * sizeof(struct ice_fv_word);
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_PTG_ES_ADD && tmp->add_prof) {
+			u16 off = tmp->prof_id * hw->blk[blk].es.fvw;
+			struct ice_pkg_es *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_VEC_TBL);
+			p = ice_pkg_buf_alloc_section(bld, id,
+						      struct_size(p, es, 1) +
+						      vec_size -
+						      sizeof(p->es[0]));
+
+			if (!p)
+				return ICE_ERR_MAX_LIMIT;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->prof_id);
+
+			memcpy(p->es, &hw->blk[blk].es.t[off], vec_size);
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_tcam - build profile ID TCAM changes
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_prof_bld_tcam(struct ice_hw *hw, enum ice_block blk,
+		  struct ice_buf_build *bld, struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_TCAM_ADD && tmp->add_tcam_idx) {
+			struct ice_prof_id_section *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_PROF_TCAM);
+			p = ice_pkg_buf_alloc_section(bld, id,
+						      struct_size(p, entry, 1));
+
+			if (!p)
+				return ICE_ERR_MAX_LIMIT;
+
+			p->count = cpu_to_le16(1);
+			p->entry[0].addr = cpu_to_le16(tmp->tcam_idx);
+			p->entry[0].prof_id = tmp->prof_id;
+
+			memcpy(p->entry[0].key,
+			       &hw->blk[blk].prof.t[tmp->tcam_idx].key,
+			       sizeof(hw->blk[blk].prof.t->key));
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_xlt1 - build XLT1 changes
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_prof_bld_xlt1(enum ice_block blk, struct ice_buf_build *bld,
+		  struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_PTG_ES_ADD && tmp->add_ptg) {
+			struct ice_xlt1_section *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_XLT1);
+			p = ice_pkg_buf_alloc_section(bld, id,
+						      struct_size(p, value, 1));
+
+			if (!p)
+				return ICE_ERR_MAX_LIMIT;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->ptype);
+			p->value[0] = tmp->ptg;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_xlt2 - build XLT2 changes
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_prof_bld_xlt2(enum ice_block blk, struct ice_buf_build *bld,
+		  struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry) {
+		struct ice_xlt2_section *p;
+		u32 id;
+
+		switch (tmp->type) {
+		case ICE_VSIG_ADD:
+		case ICE_VSI_MOVE:
+		case ICE_VSIG_REM:
+			id = ice_sect_id(blk, ICE_XLT2);
+			p = ice_pkg_buf_alloc_section(bld, id,
+						      struct_size(p, value, 1));
+
+			if (!p)
+				return ICE_ERR_MAX_LIMIT;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->vsi);
+			p->value[0] = cpu_to_le16(tmp->vsig);
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_upd_prof_hw - update hardware using the change list
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_upd_prof_hw(struct ice_hw *hw, enum ice_block blk,
+		struct list_head *chgs)
+{
+	struct ice_buf_build *b;
+	struct ice_chs_chg *tmp;
+	enum ice_status status;
+	u16 pkg_sects;
+	u16 xlt1 = 0;
+	u16 xlt2 = 0;
+	u16 tcam = 0;
+	u16 es = 0;
+	u16 sects;
+
+	/* count number of sections we need */
+	list_for_each_entry(tmp, chgs, list_entry) {
+		switch (tmp->type) {
+		case ICE_PTG_ES_ADD:
+			if (tmp->add_ptg)
+				xlt1++;
+			if (tmp->add_prof)
+				es++;
+			break;
+		case ICE_TCAM_ADD:
+			tcam++;
+			break;
+		case ICE_VSIG_ADD:
+		case ICE_VSI_MOVE:
+		case ICE_VSIG_REM:
+			xlt2++;
+			break;
+		default:
+			break;
+		}
+	}
+	sects = xlt1 + xlt2 + tcam + es;
+
+	if (!sects)
+		return 0;
+
+	/* Build update package buffer */
+	b = ice_pkg_buf_alloc(hw);
+	if (!b)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_pkg_buf_reserve_section(b, sects);
+	if (status)
+		goto error_tmp;
+
+	/* Preserve order of table update: ES, TCAM, PTG, VSIG */
+	if (es) {
+		status = ice_prof_bld_es(hw, blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (tcam) {
+		status = ice_prof_bld_tcam(hw, blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (xlt1) {
+		status = ice_prof_bld_xlt1(blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (xlt2) {
+		status = ice_prof_bld_xlt2(blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	/* After package buffer build check if the section count in buffer is
+	 * non-zero and matches the number of sections detected for package
+	 * update.
+	 */
+	pkg_sects = ice_pkg_buf_get_active_sections(b);
+	if (!pkg_sects || pkg_sects != sects) {
+		status = ICE_ERR_INVAL_SIZE;
+		goto error_tmp;
+	}
+
+	/* update package */
+	status = ice_update_pkg(hw, ice_pkg_buf(b), 1);
+	if (status == ICE_ERR_AQ_ERROR)
+		ice_debug(hw, ICE_DBG_INIT, "Unable to update HW profile\n");
+
+error_tmp:
+	ice_pkg_buf_free(hw, b);
+	return status;
+}
+
+/**
+ * ice_update_fd_mask - set Flow Director Field Vector mask for a profile
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ * @mask_sel: mask select
+ *
+ * This function enable any of the masks selected by the mask select parameter
+ * for the profile specified.
+ */
+static void ice_update_fd_mask(struct ice_hw *hw, u16 prof_id, u32 mask_sel)
+{
+	wr32(hw, GLQF_FDMASK_SEL(prof_id), mask_sel);
+
+	ice_debug(hw, ICE_DBG_INIT, "fd mask(%d): %x = %x\n", prof_id,
+		  GLQF_FDMASK_SEL(prof_id), mask_sel);
+}
+
+struct ice_fd_src_dst_pair {
+	u8 prot_id;
+	u8 count;
+	u16 off;
+};
+
+static const struct ice_fd_src_dst_pair ice_fd_pairs[] = {
+	/* These are defined in pairs */
+	{ ICE_PROT_IPV4_OF_OR_S, 2, 12 },
+	{ ICE_PROT_IPV4_OF_OR_S, 2, 16 },
+
+	{ ICE_PROT_IPV4_IL, 2, 12 },
+	{ ICE_PROT_IPV4_IL, 2, 16 },
+
+	{ ICE_PROT_IPV6_OF_OR_S, 8, 8 },
+	{ ICE_PROT_IPV6_OF_OR_S, 8, 24 },
+
+	{ ICE_PROT_IPV6_IL, 8, 8 },
+	{ ICE_PROT_IPV6_IL, 8, 24 },
+
+	{ ICE_PROT_TCP_IL, 1, 0 },
+	{ ICE_PROT_TCP_IL, 1, 2 },
+
+	{ ICE_PROT_UDP_OF, 1, 0 },
+	{ ICE_PROT_UDP_OF, 1, 2 },
+
+	{ ICE_PROT_UDP_IL_OR_S, 1, 0 },
+	{ ICE_PROT_UDP_IL_OR_S, 1, 2 },
+
+	{ ICE_PROT_SCTP_IL, 1, 0 },
+	{ ICE_PROT_SCTP_IL, 1, 2 }
+};
+
+#define ICE_FD_SRC_DST_PAIR_COUNT	ARRAY_SIZE(ice_fd_pairs)
+
+/**
+ * ice_update_fd_swap - set register appropriately for a FD FV extraction
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ * @es: extraction sequence (length of array is determined by the block)
+ */
+static enum ice_status
+ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
+{
+	DECLARE_BITMAP(pair_list, ICE_FD_SRC_DST_PAIR_COUNT);
+	u8 pair_start[ICE_FD_SRC_DST_PAIR_COUNT] = { 0 };
+#define ICE_FD_FV_NOT_FOUND (-2)
+	s8 first_free = ICE_FD_FV_NOT_FOUND;
+	u8 used[ICE_MAX_FV_WORDS] = { 0 };
+	s8 orig_free, si;
+	u32 mask_sel = 0;
+	u8 i, j, k;
+
+	bitmap_zero(pair_list, ICE_FD_SRC_DST_PAIR_COUNT);
+
+	/* This code assumes that the Flow Director field vectors are assigned
+	 * from the end of the FV indexes working towards the zero index, that
+	 * only complete fields will be included and will be consecutive, and
+	 * that there are no gaps between valid indexes.
+	 */
+
+	/* Determine swap fields present */
+	for (i = 0; i < hw->blk[ICE_BLK_FD].es.fvw; i++) {
+		/* Find the first free entry, assuming right to left population.
+		 * This is where we can start adding additional pairs if needed.
+		 */
+		if (first_free == ICE_FD_FV_NOT_FOUND && es[i].prot_id !=
+		    ICE_PROT_INVALID)
+			first_free = i - 1;
+
+		for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
+			if (es[i].prot_id == ice_fd_pairs[j].prot_id &&
+			    es[i].off == ice_fd_pairs[j].off) {
+				set_bit(j, pair_list);
+				pair_start[j] = i;
+			}
+	}
+
+	orig_free = first_free;
+
+	/* determine missing swap fields that need to be added */
+	for (i = 0; i < ICE_FD_SRC_DST_PAIR_COUNT; i += 2) {
+		u8 bit1 = test_bit(i + 1, pair_list);
+		u8 bit0 = test_bit(i, pair_list);
+
+		if (bit0 ^ bit1) {
+			u8 index;
+
+			/* add the appropriate 'paired' entry */
+			if (!bit0)
+				index = i;
+			else
+				index = i + 1;
+
+			/* check for room */
+			if (first_free + 1 < (s8)ice_fd_pairs[index].count)
+				return ICE_ERR_MAX_LIMIT;
+
+			/* place in extraction sequence */
+			for (k = 0; k < ice_fd_pairs[index].count; k++) {
+				es[first_free - k].prot_id =
+					ice_fd_pairs[index].prot_id;
+				es[first_free - k].off =
+					ice_fd_pairs[index].off + (k * 2);
+
+				if (k > first_free)
+					return ICE_ERR_OUT_OF_RANGE;
+
+				/* keep track of non-relevant fields */
+				mask_sel |= BIT(first_free - k);
+			}
+
+			pair_start[index] = first_free;
+			first_free -= ice_fd_pairs[index].count;
+		}
+	}
+
+	/* fill in the swap array */
+	si = hw->blk[ICE_BLK_FD].es.fvw - 1;
+	while (si >= 0) {
+		u8 indexes_used = 1;
+
+		/* assume flat at this index */
+#define ICE_SWAP_VALID	0x80
+		used[si] = si | ICE_SWAP_VALID;
+
+		if (orig_free == ICE_FD_FV_NOT_FOUND || si <= orig_free) {
+			si -= indexes_used;
+			continue;
+		}
+
+		/* check for a swap location */
+		for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
+			if (es[si].prot_id == ice_fd_pairs[j].prot_id &&
+			    es[si].off == ice_fd_pairs[j].off) {
+				u8 idx;
+
+				/* determine the appropriate matching field */
+				idx = j + ((j % 2) ? -1 : 1);
+
+				indexes_used = ice_fd_pairs[idx].count;
+				for (k = 0; k < indexes_used; k++) {
+					used[si - k] = (pair_start[idx] - k) |
+						ICE_SWAP_VALID;
+				}
+
+				break;
+			}
+
+		si -= indexes_used;
+	}
+
+	/* for each set of 4 swap and 4 inset indexes, write the appropriate
+	 * register
+	 */
+	for (j = 0; j < hw->blk[ICE_BLK_FD].es.fvw / 4; j++) {
+		u32 raw_swap = 0;
+		u32 raw_in = 0;
+
+		for (k = 0; k < 4; k++) {
+			u8 idx;
+
+			idx = (j * 4) + k;
+			if (used[idx] && !(mask_sel & BIT(idx))) {
+				raw_swap |= used[idx] << (k * BITS_PER_BYTE);
+#define ICE_INSET_DFLT 0x9f
+				raw_in |= ICE_INSET_DFLT << (k * BITS_PER_BYTE);
+			}
+		}
+
+		/* write the appropriate swap register set */
+		wr32(hw, GLQF_FDSWAP(prof_id, j), raw_swap);
+
+		ice_debug(hw, ICE_DBG_INIT, "swap wr(%d, %d): %x = %08x\n",
+			  prof_id, j, GLQF_FDSWAP(prof_id, j), raw_swap);
+
+		/* write the appropriate inset register set */
+		wr32(hw, GLQF_FDINSET(prof_id, j), raw_in);
+
+		ice_debug(hw, ICE_DBG_INIT, "inset wr(%d, %d): %x = %08x\n",
+			  prof_id, j, GLQF_FDINSET(prof_id, j), raw_in);
+	}
+
+	/* initially clear the mask select for this profile */
+	ice_update_fd_mask(hw, prof_id, 0);
+
+	return 0;
+}
+
+/**
+ * ice_add_prof - add profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits)
+ * @es: extraction sequence (length of array is determined by the block)
+ *
+ * This function registers a profile, which matches a set of PTGs with a
+ * particular extraction sequence. While the hardware profile is allocated
+ * it will not be written until the first call to ice_add_flow that specifies
+ * the ID value used here.
+ */
+enum ice_status
+ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+	     struct ice_fv_word *es)
+{
+	u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE);
+	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
+	struct ice_prof_map *prof;
+	enum ice_status status;
+	u8 byte = 0;
+	u8 prof_id;
+
+	bitmap_zero(ptgs_used, ICE_XLT1_CNT);
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+
+	/* search for existing profile */
+	status = ice_find_prof_id(hw, blk, es, &prof_id);
+	if (status) {
+		/* allocate profile ID */
+		status = ice_alloc_prof_id(hw, blk, &prof_id);
+		if (status)
+			goto err_ice_add_prof;
+		if (blk == ICE_BLK_FD) {
+			/* For Flow Director block, the extraction sequence may
+			 * need to be altered in the case where there are paired
+			 * fields that have no match. This is necessary because
+			 * for Flow Director, src and dest fields need to paired
+			 * for filter programming and these values are swapped
+			 * during Tx.
+			 */
+			status = ice_update_fd_swap(hw, prof_id, es);
+			if (status)
+				goto err_ice_add_prof;
+		}
+
+		/* and write new es */
+		ice_write_es(hw, blk, prof_id, es);
+	}
+
+	ice_prof_inc_ref(hw, blk, prof_id);
+
+	/* add profile info */
+	prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*prof), GFP_KERNEL);
+	if (!prof) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_ice_add_prof;
+	}
+
+	prof->profile_cookie = id;
+	prof->prof_id = prof_id;
+	prof->ptg_cnt = 0;
+	prof->context = 0;
+
+	/* build list of ptgs */
+	while (bytes && prof->ptg_cnt < ICE_MAX_PTG_PER_PROFILE) {
+		u8 bit;
+
+		if (!ptypes[byte]) {
+			bytes--;
+			byte++;
+			continue;
+		}
+
+		/* Examine 8 bits per byte */
+		for_each_set_bit(bit, (unsigned long *)&ptypes[byte],
+				 BITS_PER_BYTE) {
+			u16 ptype;
+			u8 ptg;
+			u8 m;
+
+			ptype = byte * BITS_PER_BYTE + bit;
+
+			/* The package should place all ptypes in a non-zero
+			 * PTG, so the following call should never fail.
+			 */
+			if (ice_ptg_find_ptype(hw, blk, ptype, &ptg))
+				continue;
+
+			/* If PTG is already added, skip and continue */
+			if (test_bit(ptg, ptgs_used))
+				continue;
+
+			set_bit(ptg, ptgs_used);
+			prof->ptg[prof->ptg_cnt] = ptg;
+
+			if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE)
+				break;
+
+			/* nothing left in byte, then exit */
+			m = ~(u8)((1 << (bit + 1)) - 1);
+			if (!(ptypes[byte] & m))
+				break;
+		}
+
+		bytes--;
+		byte++;
+	}
+
+	list_add(&prof->list, &hw->blk[blk].es.prof_map);
+	status = 0;
+
+err_ice_add_prof:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+}
+
+/**
+ * ice_search_prof_id - Search for a profile tracking ID
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will search for a profile tracking ID which was previously added.
+ * The profile map lock should be held before calling this function.
+ */
+static struct ice_prof_map *
+ice_search_prof_id(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_prof_map *entry = NULL;
+	struct ice_prof_map *map;
+
+	list_for_each_entry(map, &hw->blk[blk].es.prof_map, list)
+		if (map->profile_cookie == id) {
+			entry = map;
+			break;
+		}
+
+	return entry;
+}
+
+/**
+ * ice_vsig_prof_id_count - count profiles in a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG to remove the profile from
+ */
+static u16
+ice_vsig_prof_id_count(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M, count = 0;
+	struct ice_vsig_prof *p;
+
+	list_for_each_entry(p, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list)
+		count++;
+
+	return count;
+}
+
+/**
+ * ice_rel_tcam_idx - release a TCAM index
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @idx: the index to release
+ */
+static enum ice_status
+ice_rel_tcam_idx(struct ice_hw *hw, enum ice_block blk, u16 idx)
+{
+	/* Masks to invoke a never match entry */
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFE, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 };
+	enum ice_status status;
+
+	/* write the TCAM entry */
+	status = ice_tcam_write_entry(hw, blk, idx, 0, 0, 0, 0, 0, vl_msk,
+				      dc_msk, nm_msk);
+	if (status)
+		return status;
+
+	/* release the TCAM entry */
+	status = ice_free_tcam_ent(hw, blk, idx);
+
+	return status;
+}
+
+/**
+ * ice_rem_prof_id - remove one profile from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof: pointer to profile structure to remove
+ */
+static enum ice_status
+ice_rem_prof_id(struct ice_hw *hw, enum ice_block blk,
+		struct ice_vsig_prof *prof)
+{
+	enum ice_status status;
+	u16 i;
+
+	for (i = 0; i < prof->tcam_count; i++)
+		if (prof->tcam[i].in_use) {
+			prof->tcam[i].in_use = false;
+			status = ice_rel_tcam_idx(hw, blk,
+						  prof->tcam[i].tcam_idx);
+			if (status)
+				return ICE_ERR_HW_TABLE;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_rem_vsig - remove VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG to remove
+ * @chg: the change list
+ */
+static enum ice_status
+ice_rem_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+	     struct list_head *chg)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_vsi *vsi_cur;
+	struct ice_vsig_prof *d, *t;
+	enum ice_status status;
+
+	/* remove TCAM entries */
+	list_for_each_entry_safe(d, t,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list) {
+		status = ice_rem_prof_id(hw, blk, d);
+		if (status)
+			return status;
+
+		list_del(&d->list);
+		devm_kfree(ice_hw_to_dev(hw), d);
+	}
+
+	/* Move all VSIS associated with this VSIG to the default VSIG */
+	vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	/* If the VSIG has at least 1 VSI then iterate through the list
+	 * and remove the VSIs before deleting the group.
+	 */
+	if (vsi_cur)
+		do {
+			struct ice_vsig_vsi *tmp = vsi_cur->next_vsi;
+			struct ice_chs_chg *p;
+
+			p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p),
+					 GFP_KERNEL);
+			if (!p)
+				return ICE_ERR_NO_MEMORY;
+
+			p->type = ICE_VSIG_REM;
+			p->orig_vsig = vsig;
+			p->vsig = ICE_DEFAULT_VSIG;
+			p->vsi = vsi_cur - hw->blk[blk].xlt2.vsis;
+
+			list_add(&p->list_entry, chg);
+
+			vsi_cur = tmp;
+		} while (vsi_cur);
+
+	return ice_vsig_free(hw, blk, vsig);
+}
+
+/**
+ * ice_rem_prof_id_vsig - remove a specific profile from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG to remove the profile from
+ * @hdl: profile handle indicating which profile to remove
+ * @chg: list to receive a record of changes
+ */
+static enum ice_status
+ice_rem_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
+		     struct list_head *chg)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_prof *p, *t;
+	enum ice_status status;
+
+	list_for_each_entry_safe(p, t,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list)
+		if (p->profile_cookie == hdl) {
+			if (ice_vsig_prof_id_count(hw, blk, vsig) == 1)
+				/* this is the last profile, remove the VSIG */
+				return ice_rem_vsig(hw, blk, vsig, chg);
+
+			status = ice_rem_prof_id(hw, blk, p);
+			if (!status) {
+				list_del(&p->list);
+				devm_kfree(ice_hw_to_dev(hw), p);
+			}
+			return status;
+		}
+
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_rem_flow_all - remove all flows with a particular profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ */
+static enum ice_status
+ice_rem_flow_all(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_chs_chg *del, *tmp;
+	enum ice_status status;
+	struct list_head chg;
+	u16 i;
+
+	INIT_LIST_HEAD(&chg);
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (hw->blk[blk].xlt2.vsig_tbl[i].in_use) {
+			if (ice_has_prof_vsig(hw, blk, i, id)) {
+				status = ice_rem_prof_id_vsig(hw, blk, i, id,
+							      &chg);
+				if (status)
+					goto err_ice_rem_flow_all;
+			}
+		}
+
+	status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_rem_flow_all:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	return status;
+}
+
+/**
+ * ice_rem_prof - remove profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will remove the profile specified by the ID parameter, which was
+ * previously created through ice_add_prof. If any existing entries
+ * are associated with this profile, they will be removed as well.
+ */
+enum ice_status ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_prof_map *pmap;
+	enum ice_status status;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+
+	pmap = ice_search_prof_id(hw, blk, id);
+	if (!pmap) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto err_ice_rem_prof;
+	}
+
+	/* remove all flows with this profile */
+	status = ice_rem_flow_all(hw, blk, pmap->profile_cookie);
+	if (status)
+		goto err_ice_rem_prof;
+
+	/* dereference profile, and possibly remove */
+	ice_prof_dec_ref(hw, blk, pmap->prof_id);
+
+	list_del(&pmap->list);
+	devm_kfree(ice_hw_to_dev(hw), pmap);
+
+err_ice_rem_prof:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+}
+
+/**
+ * ice_get_prof - get profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @hdl: profile handle
+ * @chg: change list
+ */
+static enum ice_status
+ice_get_prof(struct ice_hw *hw, enum ice_block blk, u64 hdl,
+	     struct list_head *chg)
+{
+	enum ice_status status = 0;
+	struct ice_prof_map *map;
+	struct ice_chs_chg *p;
+	u16 i;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+	/* Get the details on the profile specified by the handle ID */
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto err_ice_get_prof;
+	}
+
+	for (i = 0; i < map->ptg_cnt; i++)
+		if (!hw->blk[blk].es.written[map->prof_id]) {
+			/* add ES to change list */
+			p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p),
+					 GFP_KERNEL);
+			if (!p) {
+				status = ICE_ERR_NO_MEMORY;
+				goto err_ice_get_prof;
+			}
+
+			p->type = ICE_PTG_ES_ADD;
+			p->ptype = 0;
+			p->ptg = map->ptg[i];
+			p->add_ptg = 0;
+
+			p->add_prof = 1;
+			p->prof_id = map->prof_id;
+
+			hw->blk[blk].es.written[map->prof_id] = true;
+
+			list_add(&p->list_entry, chg);
+		}
+
+err_ice_get_prof:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	/* let caller clean up the change list */
+	return status;
+}
+
+/**
+ * ice_get_profs_vsig - get a copy of the list of profiles from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG from which to copy the list
+ * @lst: output list
+ *
+ * This routine makes a copy of the list of profiles in the specified VSIG.
+ */
+static enum ice_status
+ice_get_profs_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+		   struct list_head *lst)
+{
+	struct ice_vsig_prof *ent1, *ent2;
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+
+	list_for_each_entry(ent1, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list) {
+		struct ice_vsig_prof *p;
+
+		/* copy to the input list */
+		p = devm_kmemdup(ice_hw_to_dev(hw), ent1, sizeof(*p),
+				 GFP_KERNEL);
+		if (!p)
+			goto err_ice_get_profs_vsig;
+
+		list_add_tail(&p->list, lst);
+	}
+
+	return 0;
+
+err_ice_get_profs_vsig:
+	list_for_each_entry_safe(ent1, ent2, lst, list) {
+		list_del(&ent1->list);
+		devm_kfree(ice_hw_to_dev(hw), ent1);
+	}
+
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_add_prof_to_lst - add profile entry to a list
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @lst: the list to be added to
+ * @hdl: profile handle of entry to add
+ */
+static enum ice_status
+ice_add_prof_to_lst(struct ice_hw *hw, enum ice_block blk,
+		    struct list_head *lst, u64 hdl)
+{
+	enum ice_status status = 0;
+	struct ice_prof_map *map;
+	struct ice_vsig_prof *p;
+	u16 i;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto err_ice_add_prof_to_lst;
+	}
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_ice_add_prof_to_lst;
+	}
+
+	p->profile_cookie = map->profile_cookie;
+	p->prof_id = map->prof_id;
+	p->tcam_count = map->ptg_cnt;
+
+	for (i = 0; i < map->ptg_cnt; i++) {
+		p->tcam[i].prof_id = map->prof_id;
+		p->tcam[i].tcam_idx = ICE_INVALID_TCAM;
+		p->tcam[i].ptg = map->ptg[i];
+	}
+
+	list_add(&p->list, lst);
+
+err_ice_add_prof_to_lst:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+}
+
+/**
+ * ice_move_vsi - move VSI to another VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI to move
+ * @vsig: the VSIG to move the VSI to
+ * @chg: the change list
+ */
+static enum ice_status
+ice_move_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig,
+	     struct list_head *chg)
+{
+	enum ice_status status;
+	struct ice_chs_chg *p;
+	u16 orig_vsig;
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig);
+	if (!status)
+		status = ice_vsig_add_mv_vsi(hw, blk, vsi, vsig);
+
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), p);
+		return status;
+	}
+
+	p->type = ICE_VSI_MOVE;
+	p->vsi = vsi;
+	p->orig_vsig = orig_vsig;
+	p->vsig = vsig;
+
+	list_add(&p->list_entry, chg);
+
+	return 0;
+}
+
+/**
+ * ice_rem_chg_tcam_ent - remove a specific TCAM entry from change list
+ * @hw: pointer to the HW struct
+ * @idx: the index of the TCAM entry to remove
+ * @chg: the list of change structures to search
+ */
+static void
+ice_rem_chg_tcam_ent(struct ice_hw *hw, u16 idx, struct list_head *chg)
+{
+	struct ice_chs_chg *pos, *tmp;
+
+	list_for_each_entry_safe(tmp, pos, chg, list_entry)
+		if (tmp->type == ICE_TCAM_ADD && tmp->tcam_idx == idx) {
+			list_del(&tmp->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), tmp);
+		}
+}
+
+/**
+ * ice_prof_tcam_ena_dis - add enable or disable TCAM change
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @enable: true to enable, false to disable
+ * @vsig: the VSIG of the TCAM entry
+ * @tcam: pointer the TCAM info structure of the TCAM to disable
+ * @chg: the change list
+ *
+ * This function appends an enable or disable TCAM entry in the change log
+ */
+static enum ice_status
+ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable,
+		      u16 vsig, struct ice_tcam_inf *tcam,
+		      struct list_head *chg)
+{
+	enum ice_status status;
+	struct ice_chs_chg *p;
+
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0x00, 0x00, 0x00 };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 };
+
+	/* if disabling, free the TCAM */
+	if (!enable) {
+		status = ice_rel_tcam_idx(hw, blk, tcam->tcam_idx);
+
+		/* if we have already created a change for this TCAM entry, then
+		 * we need to remove that entry, in order to prevent writing to
+		 * a TCAM entry we no longer will have ownership of.
+		 */
+		ice_rem_chg_tcam_ent(hw, tcam->tcam_idx, chg);
+		tcam->tcam_idx = 0;
+		tcam->in_use = 0;
+		return status;
+	}
+
+	/* for re-enabling, reallocate a TCAM */
+	status = ice_alloc_tcam_ent(hw, blk, &tcam->tcam_idx);
+	if (status)
+		return status;
+
+	/* add TCAM to change list */
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id,
+				      tcam->ptg, vsig, 0, 0, vl_msk, dc_msk,
+				      nm_msk);
+	if (status)
+		goto err_ice_prof_tcam_ena_dis;
+
+	tcam->in_use = 1;
+
+	p->type = ICE_TCAM_ADD;
+	p->add_tcam_idx = true;
+	p->prof_id = tcam->prof_id;
+	p->ptg = tcam->ptg;
+	p->vsig = 0;
+	p->tcam_idx = tcam->tcam_idx;
+
+	/* log change */
+	list_add(&p->list_entry, chg);
+
+	return 0;
+
+err_ice_prof_tcam_ena_dis:
+	devm_kfree(ice_hw_to_dev(hw), p);
+	return status;
+}
+
+/**
+ * ice_adj_prof_priorities - adjust profile based on priorities
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG for which to adjust profile priorities
+ * @chg: the change list
+ */
+static enum ice_status
+ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+			struct list_head *chg)
+{
+	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
+	struct ice_vsig_prof *t;
+	enum ice_status status;
+	u16 idx;
+
+	bitmap_zero(ptgs_used, ICE_XLT1_CNT);
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	/* Priority is based on the order in which the profiles are added. The
+	 * newest added profile has highest priority and the oldest added
+	 * profile has the lowest priority. Since the profile property list for
+	 * a VSIG is sorted from newest to oldest, this code traverses the list
+	 * in order and enables the first of each PTG that it finds (that is not
+	 * already enabled); it also disables any duplicate PTGs that it finds
+	 * in the older profiles (that are currently enabled).
+	 */
+
+	list_for_each_entry(t, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list) {
+		u16 i;
+
+		for (i = 0; i < t->tcam_count; i++) {
+			/* Scan the priorities from newest to oldest.
+			 * Make sure that the newest profiles take priority.
+			 */
+			if (test_bit(t->tcam[i].ptg, ptgs_used) &&
+			    t->tcam[i].in_use) {
+				/* need to mark this PTG as never match, as it
+				 * was already in use and therefore duplicate
+				 * (and lower priority)
+				 */
+				status = ice_prof_tcam_ena_dis(hw, blk, false,
+							       vsig,
+							       &t->tcam[i],
+							       chg);
+				if (status)
+					return status;
+			} else if (!test_bit(t->tcam[i].ptg, ptgs_used) &&
+				   !t->tcam[i].in_use) {
+				/* need to enable this PTG, as it in not in use
+				 * and not enabled (highest priority)
+				 */
+				status = ice_prof_tcam_ena_dis(hw, blk, true,
+							       vsig,
+							       &t->tcam[i],
+							       chg);
+				if (status)
+					return status;
+			}
+
+			/* keep track of used ptgs */
+			set_bit(t->tcam[i].ptg, ptgs_used);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_add_prof_id_vsig - add profile to VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG to which this profile is to be added
+ * @hdl: the profile handle indicating the profile to add
+ * @rev: true to add entries to the end of the list
+ * @chg: the change list
+ */
+static enum ice_status
+ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
+		     bool rev, struct list_head *chg)
+{
+	/* Masks that ignore flags */
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0x00, 0x00, 0x00 };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 };
+	enum ice_status status = 0;
+	struct ice_prof_map *map;
+	struct ice_vsig_prof *t;
+	struct ice_chs_chg *p;
+	u16 vsig_idx, i;
+
+	/* Error, if this VSIG already has this profile */
+	if (ice_has_prof_vsig(hw, blk, vsig, hdl))
+		return ICE_ERR_ALREADY_EXISTS;
+
+	/* new VSIG profile structure */
+	t = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return ICE_ERR_NO_MEMORY;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+	/* Get the details on the profile specified by the handle ID */
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto err_ice_add_prof_id_vsig;
+	}
+
+	t->profile_cookie = map->profile_cookie;
+	t->prof_id = map->prof_id;
+	t->tcam_count = map->ptg_cnt;
+
+	/* create TCAM entries */
+	for (i = 0; i < map->ptg_cnt; i++) {
+		u16 tcam_idx;
+
+		/* add TCAM to change list */
+		p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+		if (!p) {
+			status = ICE_ERR_NO_MEMORY;
+			goto err_ice_add_prof_id_vsig;
+		}
+
+		/* allocate the TCAM entry index */
+		status = ice_alloc_tcam_ent(hw, blk, &tcam_idx);
+		if (status) {
+			devm_kfree(ice_hw_to_dev(hw), p);
+			goto err_ice_add_prof_id_vsig;
+		}
+
+		t->tcam[i].ptg = map->ptg[i];
+		t->tcam[i].prof_id = map->prof_id;
+		t->tcam[i].tcam_idx = tcam_idx;
+		t->tcam[i].in_use = true;
+
+		p->type = ICE_TCAM_ADD;
+		p->add_tcam_idx = true;
+		p->prof_id = t->tcam[i].prof_id;
+		p->ptg = t->tcam[i].ptg;
+		p->vsig = vsig;
+		p->tcam_idx = t->tcam[i].tcam_idx;
+
+		/* write the TCAM entry */
+		status = ice_tcam_write_entry(hw, blk, t->tcam[i].tcam_idx,
+					      t->tcam[i].prof_id,
+					      t->tcam[i].ptg, vsig, 0, 0,
+					      vl_msk, dc_msk, nm_msk);
+		if (status) {
+			devm_kfree(ice_hw_to_dev(hw), p);
+			goto err_ice_add_prof_id_vsig;
+		}
+
+		/* log change */
+		list_add(&p->list_entry, chg);
+	}
+
+	/* add profile to VSIG */
+	vsig_idx = vsig & ICE_VSIG_IDX_M;
+	if (rev)
+		list_add_tail(&t->list,
+			      &hw->blk[blk].xlt2.vsig_tbl[vsig_idx].prop_lst);
+	else
+		list_add(&t->list,
+			 &hw->blk[blk].xlt2.vsig_tbl[vsig_idx].prop_lst);
+
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+
+err_ice_add_prof_id_vsig:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	/* let caller clean up the change list */
+	devm_kfree(ice_hw_to_dev(hw), t);
+	return status;
+}
+
+/**
+ * ice_create_prof_id_vsig - add a new VSIG with a single profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the initial VSI that will be in VSIG
+ * @hdl: the profile handle of the profile that will be added to the VSIG
+ * @chg: the change list
+ */
+static enum ice_status
+ice_create_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl,
+			struct list_head *chg)
+{
+	enum ice_status status;
+	struct ice_chs_chg *p;
+	u16 new_vsig;
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ICE_ERR_NO_MEMORY;
+
+	new_vsig = ice_vsig_alloc(hw, blk);
+	if (!new_vsig) {
+		status = ICE_ERR_HW_TABLE;
+		goto err_ice_create_prof_id_vsig;
+	}
+
+	status = ice_move_vsi(hw, blk, vsi, new_vsig, chg);
+	if (status)
+		goto err_ice_create_prof_id_vsig;
+
+	status = ice_add_prof_id_vsig(hw, blk, new_vsig, hdl, false, chg);
+	if (status)
+		goto err_ice_create_prof_id_vsig;
+
+	p->type = ICE_VSIG_ADD;
+	p->vsi = vsi;
+	p->orig_vsig = ICE_DEFAULT_VSIG;
+	p->vsig = new_vsig;
+
+	list_add(&p->list_entry, chg);
+
+	return 0;
+
+err_ice_create_prof_id_vsig:
+	/* let caller clean up the change list */
+	devm_kfree(ice_hw_to_dev(hw), p);
+	return status;
+}
+
+/**
+ * ice_create_vsig_from_lst - create a new VSIG with a list of profiles
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the initial VSI that will be in VSIG
+ * @lst: the list of profile that will be added to the VSIG
+ * @new_vsig: return of new VSIG
+ * @chg: the change list
+ */
+static enum ice_status
+ice_create_vsig_from_lst(struct ice_hw *hw, enum ice_block blk, u16 vsi,
+			 struct list_head *lst, u16 *new_vsig,
+			 struct list_head *chg)
+{
+	struct ice_vsig_prof *t;
+	enum ice_status status;
+	u16 vsig;
+
+	vsig = ice_vsig_alloc(hw, blk);
+	if (!vsig)
+		return ICE_ERR_HW_TABLE;
+
+	status = ice_move_vsi(hw, blk, vsi, vsig, chg);
+	if (status)
+		return status;
+
+	list_for_each_entry(t, lst, list) {
+		/* Reverse the order here since we are copying the list */
+		status = ice_add_prof_id_vsig(hw, blk, vsig, t->profile_cookie,
+					      true, chg);
+		if (status)
+			return status;
+	}
+
+	*new_vsig = vsig;
+
+	return 0;
+}
+
+/**
+ * ice_find_prof_vsig - find a VSIG with a specific profile handle
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @hdl: the profile handle of the profile to search for
+ * @vsig: returns the VSIG with the matching profile
+ */
+static bool
+ice_find_prof_vsig(struct ice_hw *hw, enum ice_block blk, u64 hdl, u16 *vsig)
+{
+	struct ice_vsig_prof *t;
+	enum ice_status status;
+	struct list_head lst;
+
+	INIT_LIST_HEAD(&lst);
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return false;
+
+	t->profile_cookie = hdl;
+	list_add(&t->list, &lst);
+
+	status = ice_find_dup_props_vsig(hw, blk, &lst, vsig);
+
+	list_del(&t->list);
+	kfree(t);
+
+	return !status;
+}
+
+/**
+ * ice_add_prof_id_flow - add profile flow
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI to enable with the profile specified by ID
+ * @hdl: profile handle
+ *
+ * Calling this function will update the hardware tables to enable the
+ * profile indicated by the ID parameter for the VSIs specified in the VSI
+ * array. Once successfully called, the flow will be enabled.
+ */
+enum ice_status
+ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
+{
+	struct ice_vsig_prof *tmp1, *del1;
+	struct ice_chs_chg *tmp, *del;
+	struct list_head union_lst;
+	enum ice_status status;
+	struct list_head chg;
+	u16 vsig;
+
+	INIT_LIST_HEAD(&union_lst);
+	INIT_LIST_HEAD(&chg);
+
+	/* Get profile */
+	status = ice_get_prof(hw, blk, hdl, &chg);
+	if (status)
+		return status;
+
+	/* determine if VSI is already part of a VSIG */
+	status = ice_vsig_find_vsi(hw, blk, vsi, &vsig);
+	if (!status && vsig) {
+		bool only_vsi;
+		u16 or_vsig;
+		u16 ref;
+
+		/* found in VSIG */
+		or_vsig = vsig;
+
+		/* make sure that there is no overlap/conflict between the new
+		 * characteristics and the existing ones; we don't support that
+		 * scenario
+		 */
+		if (ice_has_prof_vsig(hw, blk, vsig, hdl)) {
+			status = ICE_ERR_ALREADY_EXISTS;
+			goto err_ice_add_prof_id_flow;
+		}
+
+		/* last VSI in the VSIG? */
+		status = ice_vsig_get_ref(hw, blk, vsig, &ref);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+		only_vsi = (ref == 1);
+
+		/* create a union of the current profiles and the one being
+		 * added
+		 */
+		status = ice_get_profs_vsig(hw, blk, vsig, &union_lst);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+
+		status = ice_add_prof_to_lst(hw, blk, &union_lst, hdl);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+
+		/* search for an existing VSIG with an exact charc match */
+		status = ice_find_dup_props_vsig(hw, blk, &union_lst, &vsig);
+		if (!status) {
+			/* move VSI to the VSIG that matches */
+			status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* VSI has been moved out of or_vsig. If the or_vsig had
+			 * only that VSI it is now empty and can be removed.
+			 */
+			if (only_vsi) {
+				status = ice_rem_vsig(hw, blk, or_vsig, &chg);
+				if (status)
+					goto err_ice_add_prof_id_flow;
+			}
+		} else if (only_vsi) {
+			/* If the original VSIG only contains one VSI, then it
+			 * will be the requesting VSI. In this case the VSI is
+			 * not sharing entries and we can simply add the new
+			 * profile to the VSIG.
+			 */
+			status = ice_add_prof_id_vsig(hw, blk, vsig, hdl, false,
+						      &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* Adjust priorities */
+			status = ice_adj_prof_priorities(hw, blk, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		} else {
+			/* No match, so we need a new VSIG */
+			status = ice_create_vsig_from_lst(hw, blk, vsi,
+							  &union_lst, &vsig,
+							  &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* Adjust priorities */
+			status = ice_adj_prof_priorities(hw, blk, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		}
+	} else {
+		/* need to find or add a VSIG */
+		/* search for an existing VSIG with an exact charc match */
+		if (ice_find_prof_vsig(hw, blk, hdl, &vsig)) {
+			/* found an exact match */
+			/* add or move VSI to the VSIG that matches */
+			status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		} else {
+			/* we did not find an exact match */
+			/* we need to add a VSIG */
+			status = ice_create_prof_id_vsig(hw, blk, vsi, hdl,
+							 &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		}
+	}
+
+	/* update hardware */
+	if (!status)
+		status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_add_prof_id_flow:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	list_for_each_entry_safe(del1, tmp1, &union_lst, list) {
+		list_del(&del1->list);
+		devm_kfree(ice_hw_to_dev(hw), del1);
+	}
+
+	return status;
+}
+
+/**
+ * ice_rem_prof_from_list - remove a profile from list
+ * @hw: pointer to the HW struct
+ * @lst: list to remove the profile from
+ * @hdl: the profile handle indicating the profile to remove
+ */
+static enum ice_status
+ice_rem_prof_from_list(struct ice_hw *hw, struct list_head *lst, u64 hdl)
+{
+	struct ice_vsig_prof *ent, *tmp;
+
+	list_for_each_entry_safe(ent, tmp, lst, list)
+		if (ent->profile_cookie == hdl) {
+			list_del(&ent->list);
+			devm_kfree(ice_hw_to_dev(hw), ent);
+			return 0;
+		}
+
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_rem_prof_id_flow - remove flow
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI from which to remove the profile specified by ID
+ * @hdl: profile tracking handle
+ *
+ * Calling this function will update the hardware tables to remove the
+ * profile indicated by the ID parameter for the VSIs specified in the VSI
+ * array. Once successfully called, the flow will be disabled.
+ */
+enum ice_status
+ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
+{
+	struct ice_vsig_prof *tmp1, *del1;
+	struct ice_chs_chg *tmp, *del;
+	struct list_head chg, copy;
+	enum ice_status status;
+	u16 vsig;
+
+	INIT_LIST_HEAD(&copy);
+	INIT_LIST_HEAD(&chg);
+
+	/* determine if VSI is already part of a VSIG */
+	status = ice_vsig_find_vsi(hw, blk, vsi, &vsig);
+	if (!status && vsig) {
+		bool last_profile;
+		bool only_vsi;
+		u16 ref;
+
+		/* found in VSIG */
+		last_profile = ice_vsig_prof_id_count(hw, blk, vsig) == 1;
+		status = ice_vsig_get_ref(hw, blk, vsig, &ref);
+		if (status)
+			goto err_ice_rem_prof_id_flow;
+		only_vsi = (ref == 1);
+
+		if (only_vsi) {
+			/* If the original VSIG only contains one reference,
+			 * which will be the requesting VSI, then the VSI is not
+			 * sharing entries and we can simply remove the specific
+			 * characteristics from the VSIG.
+			 */
+
+			if (last_profile) {
+				/* If there are no profiles left for this VSIG,
+				 * then simply remove the VSIG.
+				 */
+				status = ice_rem_vsig(hw, blk, vsig, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			} else {
+				status = ice_rem_prof_id_vsig(hw, blk, vsig,
+							      hdl, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+				/* Adjust priorities */
+				status = ice_adj_prof_priorities(hw, blk, vsig,
+								 &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			}
+
+		} else {
+			/* Make a copy of the VSIG's list of Profiles */
+			status = ice_get_profs_vsig(hw, blk, vsig, &copy);
+			if (status)
+				goto err_ice_rem_prof_id_flow;
+
+			/* Remove specified profile entry from the list */
+			status = ice_rem_prof_from_list(hw, &copy, hdl);
+			if (status)
+				goto err_ice_rem_prof_id_flow;
+
+			if (list_empty(&copy)) {
+				status = ice_move_vsi(hw, blk, vsi,
+						      ICE_DEFAULT_VSIG, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+			} else if (!ice_find_dup_props_vsig(hw, blk, &copy,
+							    &vsig)) {
+				/* found an exact match */
+				/* add or move VSI to the VSIG that matches */
+				/* Search for a VSIG with a matching profile
+				 * list
+				 */
+
+				/* Found match, move VSI to the matching VSIG */
+				status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			} else {
+				/* since no existing VSIG supports this
+				 * characteristic pattern, we need to create a
+				 * new VSIG and TCAM entries
+				 */
+				status = ice_create_vsig_from_lst(hw, blk, vsi,
+								  &copy, &vsig,
+								  &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+				/* Adjust priorities */
+				status = ice_adj_prof_priorities(hw, blk, vsig,
+								 &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			}
+		}
+	} else {
+		status = ICE_ERR_DOES_NOT_EXIST;
+	}
+
+	/* update hardware tables */
+	if (!status)
+		status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_rem_prof_id_flow:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	list_for_each_entry_safe(del1, tmp1, &copy, list) {
+		list_del(&del1->list);
+		devm_kfree(ice_hw_to_dev(hw), del1);
+	}
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
new file mode 100644
index 000000000..20deddb80
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLEX_PIPE_H_
+#define _ICE_FLEX_PIPE_H_
+
+#include "ice_type.h"
+
+/* Package minimal version supported */
+#define ICE_PKG_SUPP_VER_MAJ	1
+#define ICE_PKG_SUPP_VER_MNR	3
+
+/* Package format version */
+#define ICE_PKG_FMT_VER_MAJ	1
+#define ICE_PKG_FMT_VER_MNR	0
+#define ICE_PKG_FMT_VER_UPD	0
+#define ICE_PKG_FMT_VER_DFT	0
+
+#define ICE_PKG_CNT 4
+
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port);
+int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+			    unsigned int idx, struct udp_tunnel_info *ti);
+int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+			      unsigned int idx, struct udp_tunnel_info *ti);
+
+enum ice_status
+ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+	     struct ice_fv_word *es);
+enum ice_status
+ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
+enum ice_status
+ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
+enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len);
+enum ice_status
+ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len);
+enum ice_status ice_init_hw_tbls(struct ice_hw *hw);
+void ice_free_seg(struct ice_hw *hw);
+void ice_fill_blk_tbls(struct ice_hw *hw);
+void ice_clear_hw_tbls(struct ice_hw *hw);
+void ice_free_hw_tbls(struct ice_hw *hw);
+enum ice_status
+ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id);
+#endif /* _ICE_FLEX_PIPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
new file mode 100644
index 000000000..24063c135
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -0,0 +1,519 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLEX_TYPE_H_
+#define _ICE_FLEX_TYPE_H_
+
+#define ICE_FV_OFFSET_INVAL	0x1FF
+
+/* Extraction Sequence (Field Vector) Table */
+struct ice_fv_word {
+	u8 prot_id;
+	u16 off;		/* Offset within the protocol header */
+	u8 resvrd;
+} __packed;
+
+#define ICE_MAX_FV_WORDS 48
+struct ice_fv {
+	struct ice_fv_word ew[ICE_MAX_FV_WORDS];
+};
+
+/* Package and segment headers and tables */
+struct ice_pkg_hdr {
+	struct ice_pkg_ver pkg_format_ver;
+	__le32 seg_count;
+	__le32 seg_offset[];
+};
+
+/* generic segment */
+struct ice_generic_seg_hdr {
+#define SEGMENT_TYPE_METADATA	0x00000001
+#define SEGMENT_TYPE_ICE	0x00000010
+	__le32 seg_type;
+	struct ice_pkg_ver seg_format_ver;
+	__le32 seg_size;
+	char seg_id[ICE_PKG_NAME_SIZE];
+};
+
+/* ice specific segment */
+
+union ice_device_id {
+	struct {
+		__le16 device_id;
+		__le16 vendor_id;
+	} dev_vend_id;
+	__le32 id;
+};
+
+struct ice_device_id_entry {
+	union ice_device_id device;
+	union ice_device_id sub_device;
+};
+
+struct ice_seg {
+	struct ice_generic_seg_hdr hdr;
+	__le32 device_table_count;
+	struct ice_device_id_entry device_table[];
+};
+
+struct ice_nvm_table {
+	__le32 table_count;
+	__le32 vers[];
+};
+
+struct ice_buf {
+#define ICE_PKG_BUF_SIZE	4096
+	u8 buf[ICE_PKG_BUF_SIZE];
+};
+
+struct ice_buf_table {
+	__le32 buf_count;
+	struct ice_buf buf_array[];
+};
+
+/* global metadata specific segment */
+struct ice_global_metadata_seg {
+	struct ice_generic_seg_hdr hdr;
+	struct ice_pkg_ver pkg_ver;
+	__le32 rsvd;
+	char pkg_name[ICE_PKG_NAME_SIZE];
+};
+
+#define ICE_MIN_S_OFF		12
+#define ICE_MAX_S_OFF		4095
+#define ICE_MIN_S_SZ		1
+#define ICE_MAX_S_SZ		4084
+
+/* section information */
+struct ice_section_entry {
+	__le32 type;
+	__le16 offset;
+	__le16 size;
+};
+
+#define ICE_MIN_S_COUNT		1
+#define ICE_MAX_S_COUNT		511
+#define ICE_MIN_S_DATA_END	12
+#define ICE_MAX_S_DATA_END	4096
+
+#define ICE_METADATA_BUF	0x80000000
+
+struct ice_buf_hdr {
+	__le16 section_count;
+	__le16 data_end;
+	struct ice_section_entry section_entry[];
+};
+
+#define ICE_MAX_ENTRIES_IN_BUF(hd_sz, ent_sz) ((ICE_PKG_BUF_SIZE - \
+	struct_size((struct ice_buf_hdr *)0, section_entry, 1) - (hd_sz)) /\
+	(ent_sz))
+
+/* ice package section IDs */
+#define ICE_SID_XLT0_SW			10
+#define ICE_SID_XLT_KEY_BUILDER_SW	11
+#define ICE_SID_XLT1_SW			12
+#define ICE_SID_XLT2_SW			13
+#define ICE_SID_PROFID_TCAM_SW		14
+#define ICE_SID_PROFID_REDIR_SW		15
+#define ICE_SID_FLD_VEC_SW		16
+#define ICE_SID_CDID_KEY_BUILDER_SW	17
+#define ICE_SID_CDID_REDIR_SW		18
+
+#define ICE_SID_XLT0_ACL		20
+#define ICE_SID_XLT_KEY_BUILDER_ACL	21
+#define ICE_SID_XLT1_ACL		22
+#define ICE_SID_XLT2_ACL		23
+#define ICE_SID_PROFID_TCAM_ACL		24
+#define ICE_SID_PROFID_REDIR_ACL	25
+#define ICE_SID_FLD_VEC_ACL		26
+#define ICE_SID_CDID_KEY_BUILDER_ACL	27
+#define ICE_SID_CDID_REDIR_ACL		28
+
+#define ICE_SID_XLT0_FD			30
+#define ICE_SID_XLT_KEY_BUILDER_FD	31
+#define ICE_SID_XLT1_FD			32
+#define ICE_SID_XLT2_FD			33
+#define ICE_SID_PROFID_TCAM_FD		34
+#define ICE_SID_PROFID_REDIR_FD		35
+#define ICE_SID_FLD_VEC_FD		36
+#define ICE_SID_CDID_KEY_BUILDER_FD	37
+#define ICE_SID_CDID_REDIR_FD		38
+
+#define ICE_SID_XLT0_RSS		40
+#define ICE_SID_XLT_KEY_BUILDER_RSS	41
+#define ICE_SID_XLT1_RSS		42
+#define ICE_SID_XLT2_RSS		43
+#define ICE_SID_PROFID_TCAM_RSS		44
+#define ICE_SID_PROFID_REDIR_RSS	45
+#define ICE_SID_FLD_VEC_RSS		46
+#define ICE_SID_CDID_KEY_BUILDER_RSS	47
+#define ICE_SID_CDID_REDIR_RSS		48
+
+#define ICE_SID_RXPARSER_BOOST_TCAM	56
+#define ICE_SID_TXPARSER_BOOST_TCAM	66
+
+#define ICE_SID_XLT0_PE			80
+#define ICE_SID_XLT_KEY_BUILDER_PE	81
+#define ICE_SID_XLT1_PE			82
+#define ICE_SID_XLT2_PE			83
+#define ICE_SID_PROFID_TCAM_PE		84
+#define ICE_SID_PROFID_REDIR_PE		85
+#define ICE_SID_FLD_VEC_PE		86
+#define ICE_SID_CDID_KEY_BUILDER_PE	87
+#define ICE_SID_CDID_REDIR_PE		88
+
+/* Label Metadata section IDs */
+#define ICE_SID_LBL_FIRST		0x80000010
+#define ICE_SID_LBL_RXPARSER_TMEM	0x80000018
+/* The following define MUST be updated to reflect the last label section ID */
+#define ICE_SID_LBL_LAST		0x80000038
+
+enum ice_block {
+	ICE_BLK_SW = 0,
+	ICE_BLK_ACL,
+	ICE_BLK_FD,
+	ICE_BLK_RSS,
+	ICE_BLK_PE,
+	ICE_BLK_COUNT
+};
+
+enum ice_sect {
+	ICE_XLT0 = 0,
+	ICE_XLT_KB,
+	ICE_XLT1,
+	ICE_XLT2,
+	ICE_PROF_TCAM,
+	ICE_PROF_REDIR,
+	ICE_VEC_TBL,
+	ICE_CDID_KB,
+	ICE_CDID_REDIR,
+	ICE_SECT_COUNT
+};
+
+/* package labels */
+struct ice_label {
+	__le16 value;
+#define ICE_PKG_LABEL_SIZE	64
+	char name[ICE_PKG_LABEL_SIZE];
+};
+
+struct ice_label_section {
+	__le16 count;
+	struct ice_label label[];
+};
+
+#define ICE_MAX_LABELS_IN_BUF ICE_MAX_ENTRIES_IN_BUF( \
+	struct_size((struct ice_label_section *)0, label, 1) - \
+	sizeof(struct ice_label), sizeof(struct ice_label))
+
+struct ice_sw_fv_section {
+	__le16 count;
+	__le16 base_offset;
+	struct ice_fv fv[];
+};
+
+/* The BOOST TCAM stores the match packet header in reverse order, meaning
+ * the fields are reversed; in addition, this means that the normally big endian
+ * fields of the packet are now little endian.
+ */
+struct ice_boost_key_value {
+#define ICE_BOOST_REMAINING_HV_KEY	15
+	u8 remaining_hv_key[ICE_BOOST_REMAINING_HV_KEY];
+	__le16 hv_dst_port_key;
+	__le16 hv_src_port_key;
+	u8 tcam_search_key;
+} __packed;
+
+struct ice_boost_key {
+	struct ice_boost_key_value key;
+	struct ice_boost_key_value key2;
+};
+
+/* package Boost TCAM entry */
+struct ice_boost_tcam_entry {
+	__le16 addr;
+	__le16 reserved;
+	/* break up the 40 bytes of key into different fields */
+	struct ice_boost_key key;
+	u8 boost_hit_index_group;
+	/* The following contains bitfields which are not on byte boundaries.
+	 * These fields are currently unused by driver software.
+	 */
+#define ICE_BOOST_BIT_FIELDS		43
+	u8 bit_fields[ICE_BOOST_BIT_FIELDS];
+};
+
+struct ice_boost_tcam_section {
+	__le16 count;
+	__le16 reserved;
+	struct ice_boost_tcam_entry tcam[];
+};
+
+#define ICE_MAX_BST_TCAMS_IN_BUF ICE_MAX_ENTRIES_IN_BUF( \
+	struct_size((struct ice_boost_tcam_section *)0, tcam, 1) - \
+	sizeof(struct ice_boost_tcam_entry), \
+	sizeof(struct ice_boost_tcam_entry))
+
+struct ice_xlt1_section {
+	__le16 count;
+	__le16 offset;
+	u8 value[];
+};
+
+struct ice_xlt2_section {
+	__le16 count;
+	__le16 offset;
+	__le16 value[];
+};
+
+struct ice_prof_redir_section {
+	__le16 count;
+	__le16 offset;
+	u8 redir_value[];
+};
+
+/* package buffer building */
+
+struct ice_buf_build {
+	struct ice_buf buf;
+	u16 reserved_section_table_entries;
+};
+
+struct ice_pkg_enum {
+	struct ice_buf_table *buf_table;
+	u32 buf_idx;
+
+	u32 type;
+	struct ice_buf_hdr *buf;
+	u32 sect_idx;
+	void *sect;
+	u32 sect_type;
+
+	u32 entry_idx;
+	void *(*handler)(u32 sect_type, void *section, u32 index, u32 *offset);
+};
+
+/* Tunnel enabling */
+
+enum ice_tunnel_type {
+	TNL_VXLAN = 0,
+	TNL_GENEVE,
+	__TNL_TYPE_CNT,
+	TNL_LAST = 0xFF,
+	TNL_ALL = 0xFF,
+};
+
+struct ice_tunnel_type_scan {
+	enum ice_tunnel_type type;
+	const char *label_prefix;
+};
+
+struct ice_tunnel_entry {
+	enum ice_tunnel_type type;
+	u16 boost_addr;
+	u16 port;
+	struct ice_boost_tcam_entry *boost_entry;
+	u8 valid;
+};
+
+#define ICE_TUNNEL_MAX_ENTRIES	16
+
+struct ice_tunnel_table {
+	struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES];
+	u16 count;
+	u16 valid_count[__TNL_TYPE_CNT];
+};
+
+struct ice_pkg_es {
+	__le16 count;
+	__le16 offset;
+	struct ice_fv_word es[];
+};
+
+struct ice_es {
+	u32 sid;
+	u16 count;
+	u16 fvw;
+	u16 *ref_count;
+	struct list_head prof_map;
+	struct ice_fv_word *t;
+	struct mutex prof_map_lock;	/* protect access to profiles list */
+	u8 *written;
+	u8 reverse; /* set to true to reverse FV order */
+};
+
+/* PTYPE Group management */
+
+/* Note: XLT1 table takes 13-bit as input, and results in an 8-bit packet type
+ * group (PTG) ID as output.
+ *
+ * Note: PTG 0 is the default packet type group and it is assumed that all PTYPE
+ * are a part of this group until moved to a new PTG.
+ */
+#define ICE_DEFAULT_PTG	0
+
+struct ice_ptg_entry {
+	struct ice_ptg_ptype *first_ptype;
+	u8 in_use;
+};
+
+struct ice_ptg_ptype {
+	struct ice_ptg_ptype *next_ptype;
+	u8 ptg;
+};
+
+#define ICE_MAX_TCAM_PER_PROFILE	32
+#define ICE_MAX_PTG_PER_PROFILE		32
+
+struct ice_prof_map {
+	struct list_head list;
+	u64 profile_cookie;
+	u64 context;
+	u8 prof_id;
+	u8 ptg_cnt;
+	u8 ptg[ICE_MAX_PTG_PER_PROFILE];
+};
+
+#define ICE_INVALID_TCAM	0xFFFF
+
+struct ice_tcam_inf {
+	u16 tcam_idx;
+	u8 ptg;
+	u8 prof_id;
+	u8 in_use;
+};
+
+struct ice_vsig_prof {
+	struct list_head list;
+	u64 profile_cookie;
+	u8 prof_id;
+	u8 tcam_count;
+	struct ice_tcam_inf tcam[ICE_MAX_TCAM_PER_PROFILE];
+};
+
+struct ice_vsig_entry {
+	struct list_head prop_lst;
+	struct ice_vsig_vsi *first_vsi;
+	u8 in_use;
+};
+
+struct ice_vsig_vsi {
+	struct ice_vsig_vsi *next_vsi;
+	u32 prop_mask;
+	u16 changed;
+	u16 vsig;
+};
+
+#define ICE_XLT1_CNT	1024
+#define ICE_MAX_PTGS	256
+
+/* XLT1 Table */
+struct ice_xlt1 {
+	struct ice_ptg_entry *ptg_tbl;
+	struct ice_ptg_ptype *ptypes;
+	u8 *t;
+	u32 sid;
+	u16 count;
+};
+
+#define ICE_XLT2_CNT	768
+#define ICE_MAX_VSIGS	768
+
+/* VSIG bit layout:
+ * [0:12]: incremental VSIG index 1 to ICE_MAX_VSIGS
+ * [13:15]: PF number of device
+ */
+#define ICE_VSIG_IDX_M	(0x1FFF)
+#define ICE_PF_NUM_S	13
+#define ICE_PF_NUM_M	(0x07 << ICE_PF_NUM_S)
+#define ICE_VSIG_VALUE(vsig, pf_id) \
+	(u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \
+	      (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M))
+#define ICE_DEFAULT_VSIG	0
+
+/* XLT2 Table */
+struct ice_xlt2 {
+	struct ice_vsig_entry *vsig_tbl;
+	struct ice_vsig_vsi *vsis;
+	u16 *t;
+	u32 sid;
+	u16 count;
+};
+
+/* Profile ID Management */
+struct ice_prof_id_key {
+	__le16 flags;
+	u8 xlt1;
+	__le16 xlt2_cdid;
+} __packed;
+
+/* Keys are made up of two values, each one-half the size of the key.
+ * For TCAM, the entire key is 80 bits wide (or 2, 40-bit wide values)
+ */
+#define ICE_TCAM_KEY_VAL_SZ	5
+#define ICE_TCAM_KEY_SZ		(2 * ICE_TCAM_KEY_VAL_SZ)
+
+struct ice_prof_tcam_entry {
+	__le16 addr;
+	u8 key[ICE_TCAM_KEY_SZ];
+	u8 prof_id;
+} __packed;
+
+struct ice_prof_id_section {
+	__le16 count;
+	struct ice_prof_tcam_entry entry[];
+};
+
+struct ice_prof_tcam {
+	u32 sid;
+	u16 count;
+	u16 max_prof_id;
+	struct ice_prof_tcam_entry *t;
+	u8 cdid_bits; /* # CDID bits to use in key, 0, 2, 4, or 8 */
+};
+
+struct ice_prof_redir {
+	u8 *t;
+	u32 sid;
+	u16 count;
+};
+
+/* Tables per block */
+struct ice_blk_info {
+	struct ice_xlt1 xlt1;
+	struct ice_xlt2 xlt2;
+	struct ice_prof_tcam prof;
+	struct ice_prof_redir prof_redir;
+	struct ice_es es;
+	u8 overwrite; /* set to true to allow overwrite of table entries */
+	u8 is_list_init;
+};
+
+enum ice_chg_type {
+	ICE_TCAM_NONE = 0,
+	ICE_PTG_ES_ADD,
+	ICE_TCAM_ADD,
+	ICE_VSIG_ADD,
+	ICE_VSIG_REM,
+	ICE_VSI_MOVE,
+};
+
+struct ice_chs_chg {
+	struct list_head list_entry;
+	enum ice_chg_type type;
+
+	u8 add_ptg;
+	u8 add_vsig;
+	u8 add_tcam_idx;
+	u8 add_prof;
+	u16 ptype;
+	u8 ptg;
+	u8 prof_id;
+	u16 vsi;
+	u16 vsig;
+	u16 orig_vsig;
+	u16 tcam_idx;
+};
+
+#define ICE_FLOW_PTYPE_MAX		ICE_XLT1_CNT
+#endif /* _ICE_FLEX_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
new file mode 100644
index 000000000..eadc85aee
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -0,0 +1,1677 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_flow.h"
+
+/* Describe properties of a protocol header field */
+struct ice_flow_field_info {
+	enum ice_flow_seg_hdr hdr;
+	s16 off;	/* Offset from start of a protocol header, in bits */
+	u16 size;	/* Size of fields in bits */
+};
+
+#define ICE_FLOW_FLD_INFO(_hdr, _offset_bytes, _size_bytes) { \
+	.hdr = _hdr, \
+	.off = (_offset_bytes) * BITS_PER_BYTE, \
+	.size = (_size_bytes) * BITS_PER_BYTE, \
+}
+
+/* Table containing properties of supported protocol header fields */
+static const
+struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
+	/* IPv4 / IPv6 */
+	/* ICE_FLOW_FIELD_IDX_IPV4_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV4_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 16, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV6_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, sizeof(struct in6_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV6_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, sizeof(struct in6_addr)),
+	/* Transport */
+	/* ICE_FLOW_FIELD_IDX_TCP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_TCP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_UDP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_UDP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)),
+	/* GRE */
+	/* ICE_FLOW_FIELD_IDX_GRE_KEYID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12,
+			  sizeof_field(struct gre_full_hdr, key)),
+};
+
+/* Bitmaps indicating relevant packet types for a particular protocol header
+ *
+ * Packet types for packets with an Outer/First/Single IPv4 header
+ */
+static const u32 ice_ptypes_ipv4_ofos[] = {
+	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv4 header */
+static const u32 ice_ptypes_ipv4_il[] = {
+	0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B,
+	0x0000000E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header */
+static const u32 ice_ptypes_ipv6_ofos[] = {
+	0x00000000, 0x00000000, 0x77000000, 0x10002000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv6 header */
+static const u32 ice_ptypes_ipv6_il[] = {
+	0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000,
+	0x00000770, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header - no L4 */
+static const u32 ice_ipv4_ofos_no_l4[] = {
+	0x10C00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv4 header - no L4 */
+static const u32 ice_ipv4_il_no_l4[] = {
+	0x60000000, 0x18043008, 0x80000002, 0x6010c021,
+	0x00000008, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header - no L4 */
+static const u32 ice_ipv6_ofos_no_l4[] = {
+	0x00000000, 0x00000000, 0x43000000, 0x10002000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv6 header - no L4 */
+static const u32 ice_ipv6_il_no_l4[] = {
+	0x00000000, 0x02180430, 0x0000010c, 0x086010c0,
+	0x00000430, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* UDP Packet types for non-tunneled packets or tunneled
+ * packets with inner UDP.
+ */
+static const u32 ice_ptypes_udp_il[] = {
+	0x81000000, 0x20204040, 0x04000010, 0x80810102,
+	0x00000040, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last TCP header */
+static const u32 ice_ptypes_tcp_il[] = {
+	0x04000000, 0x80810102, 0x10000040, 0x02040408,
+	0x00000102, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last SCTP header */
+static const u32 ice_ptypes_sctp_il[] = {
+	0x08000000, 0x01020204, 0x20000081, 0x04080810,
+	0x00000204, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First GRE header */
+static const u32 ice_ptypes_gre_of[] = {
+	0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000,
+	0x0000017E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Manage parameters and info. used during the creation of a flow profile */
+struct ice_flow_prof_params {
+	enum ice_block blk;
+	u16 entry_length; /* # of bytes formatted entry will require */
+	u8 es_cnt;
+	struct ice_flow_prof *prof;
+
+	/* For ACL, the es[0] will have the data of ICE_RX_MDID_PKT_FLAGS_15_0
+	 * This will give us the direction flags.
+	 */
+	struct ice_fv_word es[ICE_MAX_FV_WORDS];
+	DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX);
+};
+
+#define ICE_FLOW_SEG_HDRS_L3_MASK	\
+	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
+#define ICE_FLOW_SEG_HDRS_L4_MASK	\
+	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+
+/**
+ * ice_flow_val_hdrs - validates packet segments for valid protocol headers
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ */
+static enum ice_status
+ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
+{
+	u8 i;
+
+	for (i = 0; i < segs_cnt; i++) {
+		/* Multiple L3 headers */
+		if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK &&
+		    !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK))
+			return ICE_ERR_PARAM;
+
+		/* Multiple L4 headers */
+		if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK &&
+		    !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK))
+			return ICE_ERR_PARAM;
+	}
+
+	return 0;
+}
+
+/* Sizes of fixed known protocol headers without header options */
+#define ICE_FLOW_PROT_HDR_SZ_MAC	14
+#define ICE_FLOW_PROT_HDR_SZ_IPV4	20
+#define ICE_FLOW_PROT_HDR_SZ_IPV6	40
+#define ICE_FLOW_PROT_HDR_SZ_TCP	20
+#define ICE_FLOW_PROT_HDR_SZ_UDP	8
+#define ICE_FLOW_PROT_HDR_SZ_SCTP	12
+
+/**
+ * ice_flow_calc_seg_sz - calculates size of a packet segment based on headers
+ * @params: information about the flow to be processed
+ * @seg: index of packet segment whose header size is to be determined
+ */
+static u16 ice_flow_calc_seg_sz(struct ice_flow_prof_params *params, u8 seg)
+{
+	u16 sz = ICE_FLOW_PROT_HDR_SZ_MAC;
+
+	/* L3 headers */
+	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4)
+		sz += ICE_FLOW_PROT_HDR_SZ_IPV4;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV6)
+		sz += ICE_FLOW_PROT_HDR_SZ_IPV6;
+
+	/* L4 headers */
+	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP)
+		sz += ICE_FLOW_PROT_HDR_SZ_TCP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_UDP)
+		sz += ICE_FLOW_PROT_HDR_SZ_UDP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_SCTP)
+		sz += ICE_FLOW_PROT_HDR_SZ_SCTP;
+
+	return sz;
+}
+
+/**
+ * ice_flow_proc_seg_hdrs - process protocol headers present in pkt segments
+ * @params: information about the flow to be processed
+ *
+ * This function identifies the packet types associated with the protocol
+ * headers being present in packet segments of the specified flow profile.
+ */
+static enum ice_status
+ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
+{
+	struct ice_flow_prof *prof;
+	u8 i;
+
+	memset(params->ptypes, 0xff, sizeof(params->ptypes));
+
+	prof = params->prof;
+
+	for (i = 0; i < params->prof->segs_cnt; i++) {
+		const unsigned long *src;
+		u32 hdrs;
+
+		hdrs = prof->segs[i].hdrs;
+
+		if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
+		    !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
+			src = !i ? (const unsigned long *)ice_ipv4_ofos_no_l4 :
+				(const unsigned long *)ice_ipv4_il_no_l4;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV4) {
+			src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos :
+				(const unsigned long *)ice_ptypes_ipv4_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
+			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
+			src = !i ? (const unsigned long *)ice_ipv6_ofos_no_l4 :
+				(const unsigned long *)ice_ipv6_il_no_l4;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) {
+			src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos :
+				(const unsigned long *)ice_ptypes_ipv6_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_UDP) {
+			src = (const unsigned long *)ice_ptypes_udp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_TCP) {
+			bitmap_and(params->ptypes, params->ptypes,
+				   (const unsigned long *)ice_ptypes_tcp_il,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_SCTP) {
+			src = (const unsigned long *)ice_ptypes_sctp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GRE) {
+			if (!i) {
+				src = (const unsigned long *)ice_ptypes_gre_of;
+				bitmap_and(params->ptypes, params->ptypes,
+					   src, ICE_FLOW_PTYPE_MAX);
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_xtract_fld - Create an extraction sequence entry for the given field
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ * @seg: packet segment index of the field to be extracted
+ * @fld: ID of field to be extracted
+ *
+ * This function determines the protocol ID, offset, and size of the given
+ * field. It then allocates one or more extraction sequence entries for the
+ * given field, and fill the entries with protocol ID and offset information.
+ */
+static enum ice_status
+ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
+		    u8 seg, enum ice_flow_field fld)
+{
+	enum ice_prot_id prot_id = ICE_PROT_ID_INVAL;
+	u8 fv_words = hw->blk[params->blk].es.fvw;
+	struct ice_flow_fld_info *flds;
+	u16 cnt, ese_bits, i;
+	u16 off;
+
+	flds = params->prof->segs[seg].fields;
+
+	switch (fld) {
+	case ICE_FLOW_FIELD_IDX_IPV4_SA:
+	case ICE_FLOW_FIELD_IDX_IPV4_DA:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_SA:
+	case ICE_FLOW_FIELD_IDX_IPV6_DA:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_TCP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_TCP_DST_PORT:
+		prot_id = ICE_PROT_TCP_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_UDP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_UDP_DST_PORT:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
+	case ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT:
+		prot_id = ICE_PROT_SCTP_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_GRE_KEYID:
+		prot_id = ICE_PROT_GRE_OF;
+		break;
+	default:
+		return ICE_ERR_NOT_IMPL;
+	}
+
+	/* Each extraction sequence entry is a word in size, and extracts a
+	 * word-aligned offset from a protocol header.
+	 */
+	ese_bits = ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE;
+
+	flds[fld].xtrct.prot_id = prot_id;
+	flds[fld].xtrct.off = (ice_flds_info[fld].off / ese_bits) *
+		ICE_FLOW_FV_EXTRACT_SZ;
+	flds[fld].xtrct.disp = (u8)(ice_flds_info[fld].off % ese_bits);
+	flds[fld].xtrct.idx = params->es_cnt;
+
+	/* Adjust the next field-entry index after accommodating the number of
+	 * entries this field consumes
+	 */
+	cnt = DIV_ROUND_UP(flds[fld].xtrct.disp + ice_flds_info[fld].size,
+			   ese_bits);
+
+	/* Fill in the extraction sequence entries needed for this field */
+	off = flds[fld].xtrct.off;
+	for (i = 0; i < cnt; i++) {
+		u8 idx;
+
+		/* Make sure the number of extraction sequence required
+		 * does not exceed the block's capability
+		 */
+		if (params->es_cnt >= fv_words)
+			return ICE_ERR_MAX_LIMIT;
+
+		/* some blocks require a reversed field vector layout */
+		if (hw->blk[params->blk].es.reverse)
+			idx = fv_words - params->es_cnt - 1;
+		else
+			idx = params->es_cnt;
+
+		params->es[idx].prot_id = prot_id;
+		params->es[idx].off = off;
+		params->es_cnt++;
+
+		off += ICE_FLOW_FV_EXTRACT_SZ;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_xtract_raws - Create extract sequence entries for raw bytes
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ * @seg: index of packet segment whose raw fields are to be extracted
+ */
+static enum ice_status
+ice_flow_xtract_raws(struct ice_hw *hw, struct ice_flow_prof_params *params,
+		     u8 seg)
+{
+	u16 fv_words;
+	u16 hdrs_sz;
+	u8 i;
+
+	if (!params->prof->segs[seg].raws_cnt)
+		return 0;
+
+	if (params->prof->segs[seg].raws_cnt >
+	    ARRAY_SIZE(params->prof->segs[seg].raws))
+		return ICE_ERR_MAX_LIMIT;
+
+	/* Offsets within the segment headers are not supported */
+	hdrs_sz = ice_flow_calc_seg_sz(params, seg);
+	if (!hdrs_sz)
+		return ICE_ERR_PARAM;
+
+	fv_words = hw->blk[params->blk].es.fvw;
+
+	for (i = 0; i < params->prof->segs[seg].raws_cnt; i++) {
+		struct ice_flow_seg_fld_raw *raw;
+		u16 off, cnt, j;
+
+		raw = &params->prof->segs[seg].raws[i];
+
+		/* Storing extraction information */
+		raw->info.xtrct.prot_id = ICE_PROT_MAC_OF_OR_S;
+		raw->info.xtrct.off = (raw->off / ICE_FLOW_FV_EXTRACT_SZ) *
+			ICE_FLOW_FV_EXTRACT_SZ;
+		raw->info.xtrct.disp = (raw->off % ICE_FLOW_FV_EXTRACT_SZ) *
+			BITS_PER_BYTE;
+		raw->info.xtrct.idx = params->es_cnt;
+
+		/* Determine the number of field vector entries this raw field
+		 * consumes.
+		 */
+		cnt = DIV_ROUND_UP(raw->info.xtrct.disp +
+				   (raw->info.src.last * BITS_PER_BYTE),
+				   (ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE));
+		off = raw->info.xtrct.off;
+		for (j = 0; j < cnt; j++) {
+			u16 idx;
+
+			/* Make sure the number of extraction sequence required
+			 * does not exceed the block's capability
+			 */
+			if (params->es_cnt >= hw->blk[params->blk].es.count ||
+			    params->es_cnt >= ICE_MAX_FV_WORDS)
+				return ICE_ERR_MAX_LIMIT;
+
+			/* some blocks require a reversed field vector layout */
+			if (hw->blk[params->blk].es.reverse)
+				idx = fv_words - params->es_cnt - 1;
+			else
+				idx = params->es_cnt;
+
+			params->es[idx].prot_id = raw->info.xtrct.prot_id;
+			params->es[idx].off = off;
+			params->es_cnt++;
+			off += ICE_FLOW_FV_EXTRACT_SZ;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_create_xtrct_seq - Create an extraction sequence for given segments
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ *
+ * This function iterates through all matched fields in the given segments, and
+ * creates an extraction sequence for the fields.
+ */
+static enum ice_status
+ice_flow_create_xtrct_seq(struct ice_hw *hw,
+			  struct ice_flow_prof_params *params)
+{
+	struct ice_flow_prof *prof = params->prof;
+	enum ice_status status = 0;
+	u8 i;
+
+	for (i = 0; i < prof->segs_cnt; i++) {
+		u8 j;
+
+		for_each_set_bit(j, (unsigned long *)&prof->segs[i].match,
+				 ICE_FLOW_FIELD_IDX_MAX) {
+			status = ice_flow_xtract_fld(hw, params, i,
+						     (enum ice_flow_field)j);
+			if (status)
+				return status;
+		}
+
+		/* Process raw matching bytes */
+		status = ice_flow_xtract_raws(hw, params, i);
+		if (status)
+			return status;
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_proc_segs - process all packet segments associated with a profile
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ */
+static enum ice_status
+ice_flow_proc_segs(struct ice_hw *hw, struct ice_flow_prof_params *params)
+{
+	enum ice_status status;
+
+	status = ice_flow_proc_seg_hdrs(params);
+	if (status)
+		return status;
+
+	status = ice_flow_create_xtrct_seq(hw, params);
+	if (status)
+		return status;
+
+	switch (params->blk) {
+	case ICE_BLK_FD:
+	case ICE_BLK_RSS:
+		status = 0;
+		break;
+	default:
+		return ICE_ERR_NOT_IMPL;
+	}
+
+	return status;
+}
+
+#define ICE_FLOW_FIND_PROF_CHK_FLDS	0x00000001
+#define ICE_FLOW_FIND_PROF_CHK_VSI	0x00000002
+#define ICE_FLOW_FIND_PROF_NOT_CHK_DIR	0x00000004
+
+/**
+ * ice_flow_find_prof_conds - Find a profile matching headers and conditions
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @vsi_handle: software VSI handle to check VSI (ICE_FLOW_FIND_PROF_CHK_VSI)
+ * @conds: additional conditions to be checked (ICE_FLOW_FIND_PROF_CHK_*)
+ */
+static struct ice_flow_prof *
+ice_flow_find_prof_conds(struct ice_hw *hw, enum ice_block blk,
+			 enum ice_flow_dir dir, struct ice_flow_seg_info *segs,
+			 u8 segs_cnt, u16 vsi_handle, u32 conds)
+{
+	struct ice_flow_prof *p, *prof = NULL;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+	list_for_each_entry(p, &hw->fl_profs[blk], l_entry)
+		if ((p->dir == dir || conds & ICE_FLOW_FIND_PROF_NOT_CHK_DIR) &&
+		    segs_cnt && segs_cnt == p->segs_cnt) {
+			u8 i;
+
+			/* Check for profile-VSI association if specified */
+			if ((conds & ICE_FLOW_FIND_PROF_CHK_VSI) &&
+			    ice_is_vsi_valid(hw, vsi_handle) &&
+			    !test_bit(vsi_handle, p->vsis))
+				continue;
+
+			/* Protocol headers must be checked. Matched fields are
+			 * checked if specified.
+			 */
+			for (i = 0; i < segs_cnt; i++)
+				if (segs[i].hdrs != p->segs[i].hdrs ||
+				    ((conds & ICE_FLOW_FIND_PROF_CHK_FLDS) &&
+				     segs[i].match != p->segs[i].match))
+					break;
+
+			/* A match is found if all segments are matched */
+			if (i == segs_cnt) {
+				prof = p;
+				break;
+			}
+		}
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return prof;
+}
+
+/**
+ * ice_flow_find_prof_id - Look up a profile with given profile ID
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @prof_id: unique ID to identify this flow profile
+ */
+static struct ice_flow_prof *
+ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
+{
+	struct ice_flow_prof *p;
+
+	list_for_each_entry(p, &hw->fl_profs[blk], l_entry)
+		if (p->id == prof_id)
+			return p;
+
+	return NULL;
+}
+
+/**
+ * ice_dealloc_flow_entry - Deallocate flow entry memory
+ * @hw: pointer to the HW struct
+ * @entry: flow entry to be removed
+ */
+static void
+ice_dealloc_flow_entry(struct ice_hw *hw, struct ice_flow_entry *entry)
+{
+	if (!entry)
+		return;
+
+	if (entry->entry)
+		devm_kfree(ice_hw_to_dev(hw), entry->entry);
+
+	devm_kfree(ice_hw_to_dev(hw), entry);
+}
+
+/**
+ * ice_flow_rem_entry_sync - Remove a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @entry: flow entry to be removed
+ */
+static enum ice_status
+ice_flow_rem_entry_sync(struct ice_hw *hw, enum ice_block __always_unused blk,
+			struct ice_flow_entry *entry)
+{
+	if (!entry)
+		return ICE_ERR_BAD_PTR;
+
+	list_del(&entry->l_entry);
+
+	ice_dealloc_flow_entry(hw, entry);
+
+	return 0;
+}
+
+/**
+ * ice_flow_add_prof_sync - Add a flow profile for packet segments and fields
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @prof_id: unique ID to identify this flow profile
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @prof: stores the returned flow profile added
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ */
+static enum ice_status
+ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk,
+		       enum ice_flow_dir dir, u64 prof_id,
+		       struct ice_flow_seg_info *segs, u8 segs_cnt,
+		       struct ice_flow_prof **prof)
+{
+	struct ice_flow_prof_params params;
+	enum ice_status status;
+	u8 i;
+
+	if (!prof)
+		return ICE_ERR_BAD_PTR;
+
+	memset(&params, 0, sizeof(params));
+	params.prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*params.prof),
+				   GFP_KERNEL);
+	if (!params.prof)
+		return ICE_ERR_NO_MEMORY;
+
+	/* initialize extraction sequence to all invalid (0xff) */
+	for (i = 0; i < ICE_MAX_FV_WORDS; i++) {
+		params.es[i].prot_id = ICE_PROT_INVALID;
+		params.es[i].off = ICE_FV_OFFSET_INVAL;
+	}
+
+	params.blk = blk;
+	params.prof->id = prof_id;
+	params.prof->dir = dir;
+	params.prof->segs_cnt = segs_cnt;
+
+	/* Make a copy of the segments that need to be persistent in the flow
+	 * profile instance
+	 */
+	for (i = 0; i < segs_cnt; i++)
+		memcpy(&params.prof->segs[i], &segs[i], sizeof(*segs));
+
+	status = ice_flow_proc_segs(hw, &params);
+	if (status) {
+		ice_debug(hw, ICE_DBG_FLOW,
+			  "Error processing a flow's packet segments\n");
+		goto out;
+	}
+
+	/* Add a HW profile for this flow profile */
+	status = ice_add_prof(hw, blk, prof_id, (u8 *)params.ptypes, params.es);
+	if (status) {
+		ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n");
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&params.prof->entries);
+	mutex_init(&params.prof->entries_lock);
+	*prof = params.prof;
+
+out:
+	if (status)
+		devm_kfree(ice_hw_to_dev(hw), params.prof);
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_prof_sync - remove a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile to remove
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ */
+static enum ice_status
+ice_flow_rem_prof_sync(struct ice_hw *hw, enum ice_block blk,
+		       struct ice_flow_prof *prof)
+{
+	enum ice_status status;
+
+	/* Remove all remaining flow entries before removing the flow profile */
+	if (!list_empty(&prof->entries)) {
+		struct ice_flow_entry *e, *t;
+
+		mutex_lock(&prof->entries_lock);
+
+		list_for_each_entry_safe(e, t, &prof->entries, l_entry) {
+			status = ice_flow_rem_entry_sync(hw, blk, e);
+			if (status)
+				break;
+		}
+
+		mutex_unlock(&prof->entries_lock);
+	}
+
+	/* Remove all hardware profiles associated with this flow profile */
+	status = ice_rem_prof(hw, blk, prof->id);
+	if (!status) {
+		list_del(&prof->l_entry);
+		mutex_destroy(&prof->entries_lock);
+		devm_kfree(ice_hw_to_dev(hw), prof);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_assoc_prof - associate a VSI with a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile
+ * @vsi_handle: software VSI handle
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ * and the software VSI handle has been validated
+ */
+static enum ice_status
+ice_flow_assoc_prof(struct ice_hw *hw, enum ice_block blk,
+		    struct ice_flow_prof *prof, u16 vsi_handle)
+{
+	enum ice_status status = 0;
+
+	if (!test_bit(vsi_handle, prof->vsis)) {
+		status = ice_add_prof_id_flow(hw, blk,
+					      ice_get_hw_vsi_num(hw,
+								 vsi_handle),
+					      prof->id);
+		if (!status)
+			set_bit(vsi_handle, prof->vsis);
+		else
+			ice_debug(hw, ICE_DBG_FLOW,
+				  "HW profile add failed, %d\n",
+				  status);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_disassoc_prof - disassociate a VSI from a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile
+ * @vsi_handle: software VSI handle
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ * and the software VSI handle has been validated
+ */
+static enum ice_status
+ice_flow_disassoc_prof(struct ice_hw *hw, enum ice_block blk,
+		       struct ice_flow_prof *prof, u16 vsi_handle)
+{
+	enum ice_status status = 0;
+
+	if (test_bit(vsi_handle, prof->vsis)) {
+		status = ice_rem_prof_id_flow(hw, blk,
+					      ice_get_hw_vsi_num(hw,
+								 vsi_handle),
+					      prof->id);
+		if (!status)
+			clear_bit(vsi_handle, prof->vsis);
+		else
+			ice_debug(hw, ICE_DBG_FLOW,
+				  "HW profile remove failed, %d\n",
+				  status);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_add_prof - Add a flow profile for packet segments and matched fields
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @prof_id: unique ID to identify this flow profile
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @prof: stores the returned flow profile added
+ */
+enum ice_status
+ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
+		  u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
+		  struct ice_flow_prof **prof)
+{
+	enum ice_status status;
+
+	if (segs_cnt > ICE_FLOW_SEG_MAX)
+		return ICE_ERR_MAX_LIMIT;
+
+	if (!segs_cnt)
+		return ICE_ERR_PARAM;
+
+	if (!segs)
+		return ICE_ERR_BAD_PTR;
+
+	status = ice_flow_val_hdrs(segs, segs_cnt);
+	if (status)
+		return status;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	status = ice_flow_add_prof_sync(hw, blk, dir, prof_id, segs, segs_cnt,
+					prof);
+	if (!status)
+		list_add(&(*prof)->l_entry, &hw->fl_profs[blk]);
+
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_prof - Remove a flow profile and all entries associated with it
+ * @hw: pointer to the HW struct
+ * @blk: the block for which the flow profile is to be removed
+ * @prof_id: unique ID of the flow profile to be removed
+ */
+enum ice_status
+ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
+{
+	struct ice_flow_prof *prof;
+	enum ice_status status;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	prof = ice_flow_find_prof_id(hw, blk, prof_id);
+	if (!prof) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto out;
+	}
+
+	/* prof becomes invalid after the call */
+	status = ice_flow_rem_prof_sync(hw, blk, prof);
+
+out:
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return status;
+}
+
+/**
+ * ice_flow_add_entry - Add a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @prof_id: ID of the profile to add a new flow entry to
+ * @entry_id: unique ID to identify this flow entry
+ * @vsi_handle: software VSI handle for the flow entry
+ * @prio: priority of the flow entry
+ * @data: pointer to a data buffer containing flow entry's match values/masks
+ * @entry_h: pointer to buffer that receives the new flow entry's handle
+ */
+enum ice_status
+ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
+		   u64 entry_id, u16 vsi_handle, enum ice_flow_priority prio,
+		   void *data, u64 *entry_h)
+{
+	struct ice_flow_entry *e = NULL;
+	struct ice_flow_prof *prof;
+	enum ice_status status;
+
+	/* No flow entry data is expected for RSS */
+	if (!entry_h || (!data && blk != ICE_BLK_RSS))
+		return ICE_ERR_BAD_PTR;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	prof = ice_flow_find_prof_id(hw, blk, prof_id);
+	if (!prof) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+	} else {
+		/* Allocate memory for the entry being added and associate
+		 * the VSI to the found flow profile
+		 */
+		e = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*e), GFP_KERNEL);
+		if (!e)
+			status = ICE_ERR_NO_MEMORY;
+		else
+			status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+	}
+
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+	if (status)
+		goto out;
+
+	e->id = entry_id;
+	e->vsi_handle = vsi_handle;
+	e->prof = prof;
+	e->priority = prio;
+
+	switch (blk) {
+	case ICE_BLK_FD:
+	case ICE_BLK_RSS:
+		break;
+	default:
+		status = ICE_ERR_NOT_IMPL;
+		goto out;
+	}
+
+	mutex_lock(&prof->entries_lock);
+	list_add(&e->l_entry, &prof->entries);
+	mutex_unlock(&prof->entries_lock);
+
+	*entry_h = ICE_FLOW_ENTRY_HNDL(e);
+
+out:
+	if (status && e) {
+		if (e->entry)
+			devm_kfree(ice_hw_to_dev(hw), e->entry);
+		devm_kfree(ice_hw_to_dev(hw), e);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_entry - Remove a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @entry_h: handle to the flow entry to be removed
+ */
+enum ice_status ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk,
+				   u64 entry_h)
+{
+	struct ice_flow_entry *entry;
+	struct ice_flow_prof *prof;
+	enum ice_status status = 0;
+
+	if (entry_h == ICE_FLOW_ENTRY_HANDLE_INVAL)
+		return ICE_ERR_PARAM;
+
+	entry = ICE_FLOW_ENTRY_PTR(entry_h);
+
+	/* Retain the pointer to the flow profile as the entry will be freed */
+	prof = entry->prof;
+
+	if (prof) {
+		mutex_lock(&prof->entries_lock);
+		status = ice_flow_rem_entry_sync(hw, blk, entry);
+		mutex_unlock(&prof->entries_lock);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_set_fld_ext - specifies locations of field from entry's input buffer
+ * @seg: packet segment the field being set belongs to
+ * @fld: field to be set
+ * @field_type: type of the field
+ * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from
+ *           entry's input buffer
+ * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's
+ *            input buffer
+ * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from
+ *            entry's input buffer
+ *
+ * This helper function stores information of a field being matched, including
+ * the type of the field and the locations of the value to match, the mask, and
+ * the upper-bound value in the start of the input buffer for a flow entry.
+ * This function should only be used for fixed-size data structures.
+ *
+ * This function also opportunistically determines the protocol headers to be
+ * present based on the fields being set. Some fields cannot be used alone to
+ * determine the protocol headers present. Sometimes, fields for particular
+ * protocol headers are not matched. In those cases, the protocol headers
+ * must be explicitly set.
+ */
+static void
+ice_flow_set_fld_ext(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		     enum ice_flow_fld_match_type field_type, u16 val_loc,
+		     u16 mask_loc, u16 last_loc)
+{
+	u64 bit = BIT_ULL(fld);
+
+	seg->match |= bit;
+	if (field_type == ICE_FLOW_FLD_TYPE_RANGE)
+		seg->range |= bit;
+
+	seg->fields[fld].type = field_type;
+	seg->fields[fld].src.val = val_loc;
+	seg->fields[fld].src.mask = mask_loc;
+	seg->fields[fld].src.last = last_loc;
+
+	ICE_FLOW_SET_HDRS(seg, ice_flds_info[fld].hdr);
+}
+
+/**
+ * ice_flow_set_fld - specifies locations of field from entry's input buffer
+ * @seg: packet segment the field being set belongs to
+ * @fld: field to be set
+ * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from
+ *           entry's input buffer
+ * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's
+ *            input buffer
+ * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from
+ *            entry's input buffer
+ * @range: indicate if field being matched is to be in a range
+ *
+ * This function specifies the locations, in the form of byte offsets from the
+ * start of the input buffer for a flow entry, from where the value to match,
+ * the mask value, and upper value can be extracted. These locations are then
+ * stored in the flow profile. When adding a flow entry associated with the
+ * flow profile, these locations will be used to quickly extract the values and
+ * create the content of a match entry. This function should only be used for
+ * fixed-size data structures.
+ */
+void
+ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		 u16 val_loc, u16 mask_loc, u16 last_loc, bool range)
+{
+	enum ice_flow_fld_match_type t = range ?
+		ICE_FLOW_FLD_TYPE_RANGE : ICE_FLOW_FLD_TYPE_REG;
+
+	ice_flow_set_fld_ext(seg, fld, t, val_loc, mask_loc, last_loc);
+}
+
+/**
+ * ice_flow_add_fld_raw - sets locations of a raw field from entry's input buf
+ * @seg: packet segment the field being set belongs to
+ * @off: offset of the raw field from the beginning of the segment in bytes
+ * @len: length of the raw pattern to be matched
+ * @val_loc: location of the value to match from entry's input buffer
+ * @mask_loc: location of mask value from entry's input buffer
+ *
+ * This function specifies the offset of the raw field to be match from the
+ * beginning of the specified packet segment, and the locations, in the form of
+ * byte offsets from the start of the input buffer for a flow entry, from where
+ * the value to match and the mask value to be extracted. These locations are
+ * then stored in the flow profile. When adding flow entries to the associated
+ * flow profile, these locations can be used to quickly extract the values to
+ * create the content of a match entry. This function should only be used for
+ * fixed-size data structures.
+ */
+void
+ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
+		     u16 val_loc, u16 mask_loc)
+{
+	if (seg->raws_cnt < ICE_FLOW_SEG_RAW_FLD_MAX) {
+		seg->raws[seg->raws_cnt].off = off;
+		seg->raws[seg->raws_cnt].info.type = ICE_FLOW_FLD_TYPE_SIZE;
+		seg->raws[seg->raws_cnt].info.src.val = val_loc;
+		seg->raws[seg->raws_cnt].info.src.mask = mask_loc;
+		/* The "last" field is used to store the length of the field */
+		seg->raws[seg->raws_cnt].info.src.last = len;
+	}
+
+	/* Overflows of "raws" will be handled as an error condition later in
+	 * the flow when this information is processed.
+	 */
+	seg->raws_cnt++;
+}
+
+#define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \
+	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
+
+#define ICE_FLOW_RSS_SEG_HDR_L4_MASKS \
+	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+
+#define ICE_FLOW_RSS_SEG_HDR_VAL_MASKS \
+	(ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \
+	 ICE_FLOW_RSS_SEG_HDR_L4_MASKS)
+
+/**
+ * ice_flow_set_rss_seg_info - setup packet segments for RSS
+ * @segs: pointer to the flow field segment(s)
+ * @hash_fields: fields to be hashed on for the segment(s)
+ * @flow_hdr: protocol header fields within a packet segment
+ *
+ * Helper function to extract fields from hash bitmap and use flow
+ * header value to set flow field segment for further use in flow
+ * profile entry or removal.
+ */
+static enum ice_status
+ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields,
+			  u32 flow_hdr)
+{
+	u64 val;
+	u8 i;
+
+	for_each_set_bit(i, (unsigned long *)&hash_fields,
+			 ICE_FLOW_FIELD_IDX_MAX)
+		ice_flow_set_fld(segs, (enum ice_flow_field)i,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+
+	ICE_FLOW_SET_HDRS(segs, flow_hdr);
+
+	if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS)
+		return ICE_ERR_PARAM;
+
+	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS);
+	if (val && !is_power_of_2(val))
+		return ICE_ERR_CFG;
+
+	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L4_MASKS);
+	if (val && !is_power_of_2(val))
+		return ICE_ERR_CFG;
+
+	return 0;
+}
+
+/**
+ * ice_rem_vsi_rss_list - remove VSI from RSS list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ *
+ * Remove the VSI from all RSS configurations in the list.
+ */
+void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_rss_cfg *r, *tmp;
+
+	if (list_empty(&hw->rss_list_head))
+		return;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry)
+		if (test_and_clear_bit(vsi_handle, r->vsis))
+			if (bitmap_empty(r->vsis, ICE_MAX_VSI)) {
+				list_del(&r->l_entry);
+				devm_kfree(ice_hw_to_dev(hw), r);
+			}
+	mutex_unlock(&hw->rss_locks);
+}
+
+/**
+ * ice_rem_vsi_rss_cfg - remove RSS configurations associated with VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function will iterate through all flow profiles and disassociate
+ * the VSI from that profile. If the flow profile has no VSIs it will
+ * be removed.
+ */
+enum ice_status ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
+{
+	const enum ice_block blk = ICE_BLK_RSS;
+	struct ice_flow_prof *p, *t;
+	enum ice_status status = 0;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	if (list_empty(&hw->fl_profs[blk]))
+		return 0;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry_safe(p, t, &hw->fl_profs[blk], l_entry)
+		if (test_bit(vsi_handle, p->vsis)) {
+			status = ice_flow_disassoc_prof(hw, blk, p, vsi_handle);
+			if (status)
+				break;
+
+			if (bitmap_empty(p->vsis, ICE_MAX_VSI)) {
+				status = ice_flow_rem_prof(hw, blk, p->id);
+				if (status)
+					break;
+			}
+		}
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/**
+ * ice_rem_rss_list - remove RSS configuration from list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof: pointer to flow profile
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static void
+ice_rem_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
+{
+	struct ice_rss_cfg *r, *tmp;
+
+	/* Search for RSS hash fields associated to the VSI that match the
+	 * hash configurations associated to the flow profile. If found
+	 * remove from the RSS entry list of the VSI context and delete entry.
+	 */
+	list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry)
+		if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match &&
+		    r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) {
+			clear_bit(vsi_handle, r->vsis);
+			if (bitmap_empty(r->vsis, ICE_MAX_VSI)) {
+				list_del(&r->l_entry);
+				devm_kfree(ice_hw_to_dev(hw), r);
+			}
+			return;
+		}
+}
+
+/**
+ * ice_add_rss_list - add RSS configuration to list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof: pointer to flow profile
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static enum ice_status
+ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
+{
+	struct ice_rss_cfg *r, *rss_cfg;
+
+	list_for_each_entry(r, &hw->rss_list_head, l_entry)
+		if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match &&
+		    r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) {
+			set_bit(vsi_handle, r->vsis);
+			return 0;
+		}
+
+	rss_cfg = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rss_cfg),
+			       GFP_KERNEL);
+	if (!rss_cfg)
+		return ICE_ERR_NO_MEMORY;
+
+	rss_cfg->hashed_flds = prof->segs[prof->segs_cnt - 1].match;
+	rss_cfg->packet_hdr = prof->segs[prof->segs_cnt - 1].hdrs;
+	set_bit(vsi_handle, rss_cfg->vsis);
+
+	list_add_tail(&rss_cfg->l_entry, &hw->rss_list_head);
+
+	return 0;
+}
+
+#define ICE_FLOW_PROF_HASH_S	0
+#define ICE_FLOW_PROF_HASH_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_HASH_S)
+#define ICE_FLOW_PROF_HDR_S	32
+#define ICE_FLOW_PROF_HDR_M	(0x3FFFFFFFULL << ICE_FLOW_PROF_HDR_S)
+#define ICE_FLOW_PROF_ENCAP_S	63
+#define ICE_FLOW_PROF_ENCAP_M	(BIT_ULL(ICE_FLOW_PROF_ENCAP_S))
+
+#define ICE_RSS_OUTER_HEADERS	1
+#define ICE_RSS_INNER_HEADERS	2
+
+/* Flow profile ID format:
+ * [0:31] - Packet match fields
+ * [32:62] - Protocol header
+ * [63] - Encapsulation flag, 0 if non-tunneled, 1 if tunneled
+ */
+#define ICE_FLOW_GEN_PROFID(hash, hdr, segs_cnt) \
+	(u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \
+	      (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \
+	      ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0))
+
+/**
+ * ice_add_rss_cfg_sync - add an RSS configuration
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure
+ * @addl_hdrs: protocol header fields
+ * @segs_cnt: packet segment count
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static enum ice_status
+ice_add_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		     u32 addl_hdrs, u8 segs_cnt)
+{
+	const enum ice_block blk = ICE_BLK_RSS;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_flow_seg_info *segs;
+	enum ice_status status;
+
+	if (!segs_cnt || segs_cnt > ICE_FLOW_SEG_MAX)
+		return ICE_ERR_PARAM;
+
+	segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL);
+	if (!segs)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Construct the packet segment info from the hashed fields */
+	status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds,
+					   addl_hdrs);
+	if (status)
+		goto exit;
+
+	/* Search for a flow profile that has matching headers, hash fields
+	 * and has the input VSI associated to it. If found, no further
+	 * operations required and exit.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle,
+					ICE_FLOW_FIND_PROF_CHK_FLDS |
+					ICE_FLOW_FIND_PROF_CHK_VSI);
+	if (prof)
+		goto exit;
+
+	/* Check if a flow profile exists with the same protocol headers and
+	 * associated with the input VSI. If so disassociate the VSI from
+	 * this profile. The VSI will be added to a new profile created with
+	 * the protocol header and new hash field configuration.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle, ICE_FLOW_FIND_PROF_CHK_VSI);
+	if (prof) {
+		status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle);
+		if (!status)
+			ice_rem_rss_list(hw, vsi_handle, prof);
+		else
+			goto exit;
+
+		/* Remove profile if it has no VSIs associated */
+		if (bitmap_empty(prof->vsis, ICE_MAX_VSI)) {
+			status = ice_flow_rem_prof(hw, blk, prof->id);
+			if (status)
+				goto exit;
+		}
+	}
+
+	/* Search for a profile that has same match fields only. If this
+	 * exists then associate the VSI to this profile.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle,
+					ICE_FLOW_FIND_PROF_CHK_FLDS);
+	if (prof) {
+		status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+		if (!status)
+			status = ice_add_rss_list(hw, vsi_handle, prof);
+		goto exit;
+	}
+
+	/* Create a new flow profile with generated profile and packet
+	 * segment information.
+	 */
+	status = ice_flow_add_prof(hw, blk, ICE_FLOW_RX,
+				   ICE_FLOW_GEN_PROFID(hashed_flds,
+						       segs[segs_cnt - 1].hdrs,
+						       segs_cnt),
+				   segs, segs_cnt, &prof);
+	if (status)
+		goto exit;
+
+	status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+	/* If association to a new flow profile failed then this profile can
+	 * be removed.
+	 */
+	if (status) {
+		ice_flow_rem_prof(hw, blk, prof->id);
+		goto exit;
+	}
+
+	status = ice_add_rss_list(hw, vsi_handle, prof);
+
+exit:
+	kfree(segs);
+	return status;
+}
+
+/**
+ * ice_add_rss_cfg - add an RSS configuration with specified hashed fields
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure
+ * @addl_hdrs: protocol header fields
+ *
+ * This function will generate a flow profile based on fields associated with
+ * the input fields to hash on, the flow type and use the VSI number to add
+ * a flow entry to the profile.
+ */
+enum ice_status
+ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs)
+{
+	enum ice_status status;
+
+	if (hashed_flds == ICE_HASH_INVALID ||
+	    !ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->rss_locks);
+	status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs,
+				      ICE_RSS_OUTER_HEADERS);
+	if (!status)
+		status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds,
+					      addl_hdrs, ICE_RSS_INNER_HEADERS);
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/* Mapping of AVF hash bit fields to an L3-L4 hash combination.
+ * As the ice_flow_avf_hdr_field represent individual bit shifts in a hash,
+ * convert its values to their appropriate flow L3, L4 values.
+ */
+#define ICE_FLOW_AVF_RSS_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4))
+#define ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP))
+#define ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP))
+#define ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS \
+	(ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS | \
+	 ICE_FLOW_AVF_RSS_IPV4_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP))
+
+#define ICE_FLOW_AVF_RSS_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6))
+#define ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP))
+#define ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP))
+#define ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS \
+	(ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS | \
+	 ICE_FLOW_AVF_RSS_IPV6_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP))
+
+/**
+ * ice_add_avf_rss_cfg - add an RSS configuration for AVF driver
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @avf_hash: hash bit fields (ICE_AVF_FLOW_FIELD_*) to configure
+ *
+ * This function will take the hash bitmap provided by the AVF driver via a
+ * message, convert it to ICE-compatible values, and configure RSS flow
+ * profiles.
+ */
+enum ice_status
+ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 avf_hash)
+{
+	enum ice_status status = 0;
+	u64 hash_flds;
+
+	if (avf_hash == ICE_AVF_FLOW_FIELD_INVALID ||
+	    !ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	/* Make sure no unsupported bits are specified */
+	if (avf_hash & ~(ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS |
+			 ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS))
+		return ICE_ERR_CFG;
+
+	hash_flds = avf_hash;
+
+	/* Always create an L3 RSS configuration for any L4 RSS configuration */
+	if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS)
+		hash_flds |= ICE_FLOW_AVF_RSS_IPV4_MASKS;
+
+	if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS)
+		hash_flds |= ICE_FLOW_AVF_RSS_IPV6_MASKS;
+
+	/* Create the corresponding RSS configuration for each valid hash bit */
+	while (hash_flds) {
+		u64 rss_hash = ICE_HASH_INVALID;
+
+		if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS) {
+			if (hash_flds & ICE_FLOW_AVF_RSS_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_IPV4_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_TCP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_UDP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS;
+			} else if (hash_flds &
+				   BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP)) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_SCTP_PORT;
+				hash_flds &=
+					~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP);
+			}
+		} else if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS) {
+			if (hash_flds & ICE_FLOW_AVF_RSS_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_IPV6_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_TCP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_UDP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS;
+			} else if (hash_flds &
+				   BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP)) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_SCTP_PORT;
+				hash_flds &=
+					~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP);
+			}
+		}
+
+		if (rss_hash == ICE_HASH_INVALID)
+			return ICE_ERR_OUT_OF_RANGE;
+
+		status = ice_add_rss_cfg(hw, vsi_handle, rss_hash,
+					 ICE_FLOW_SEG_HDR_NONE);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * ice_replay_rss_cfg - replay RSS configurations associated with VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ */
+enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
+{
+	enum ice_status status = 0;
+	struct ice_rss_cfg *r;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry(r, &hw->rss_list_head, l_entry) {
+		if (test_bit(vsi_handle, r->vsis)) {
+			status = ice_add_rss_cfg_sync(hw, vsi_handle,
+						      r->hashed_flds,
+						      r->packet_hdr,
+						      ICE_RSS_OUTER_HEADERS);
+			if (status)
+				break;
+			status = ice_add_rss_cfg_sync(hw, vsi_handle,
+						      r->hashed_flds,
+						      r->packet_hdr,
+						      ICE_RSS_INNER_HEADERS);
+			if (status)
+				break;
+		}
+	}
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/**
+ * ice_get_rss_cfg - returns hashed fields for the given header types
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hdrs: protocol header type
+ *
+ * This function will return the match fields of the first instance of flow
+ * profile having the given header types and containing input VSI
+ */
+u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs)
+{
+	u64 rss_hash = ICE_HASH_INVALID;
+	struct ice_rss_cfg *r;
+
+	/* verify if the protocol header is non zero and VSI is valid */
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE || !ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_HASH_INVALID;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry(r, &hw->rss_list_head, l_entry)
+		if (test_bit(vsi_handle, r->vsis) &&
+		    r->packet_hdr == hdrs) {
+			rss_hash = r->hashed_flds;
+			break;
+		}
+	mutex_unlock(&hw->rss_locks);
+
+	return rss_hash;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
new file mode 100644
index 000000000..829f90b1e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLOW_H_
+#define _ICE_FLOW_H_
+
+#define ICE_FLOW_ENTRY_HANDLE_INVAL	0
+#define ICE_FLOW_FLD_OFF_INVAL		0xffff
+
+/* Generate flow hash field from flow field type(s) */
+#define ICE_FLOW_HASH_IPV4	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA))
+#define ICE_FLOW_HASH_IPV6	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA))
+#define ICE_FLOW_HASH_TCP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT))
+#define ICE_FLOW_HASH_UDP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT))
+#define ICE_FLOW_HASH_SCTP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT))
+
+#define ICE_HASH_INVALID	0
+#define ICE_HASH_TCP_IPV4	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_TCP_PORT)
+#define ICE_HASH_TCP_IPV6	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_TCP_PORT)
+#define ICE_HASH_UDP_IPV4	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_UDP_PORT)
+#define ICE_HASH_UDP_IPV6	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_UDP_PORT)
+
+/* Protocol header fields within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization such as GRE,
+ * VxLAN, etc.
+ */
+enum ice_flow_seg_hdr {
+	ICE_FLOW_SEG_HDR_NONE		= 0x00000000,
+	ICE_FLOW_SEG_HDR_IPV4		= 0x00000004,
+	ICE_FLOW_SEG_HDR_IPV6		= 0x00000008,
+	ICE_FLOW_SEG_HDR_TCP		= 0x00000040,
+	ICE_FLOW_SEG_HDR_UDP		= 0x00000080,
+	ICE_FLOW_SEG_HDR_SCTP		= 0x00000100,
+	ICE_FLOW_SEG_HDR_GRE		= 0x00000200,
+};
+
+enum ice_flow_field {
+	/* L3 */
+	ICE_FLOW_FIELD_IDX_IPV4_SA,
+	ICE_FLOW_FIELD_IDX_IPV4_DA,
+	ICE_FLOW_FIELD_IDX_IPV6_SA,
+	ICE_FLOW_FIELD_IDX_IPV6_DA,
+	/* L4 */
+	ICE_FLOW_FIELD_IDX_TCP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_TCP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_UDP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_UDP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_SCTP_DST_PORT,
+	/* GRE */
+	ICE_FLOW_FIELD_IDX_GRE_KEYID,
+	/* The total number of enums must not exceed 64 */
+	ICE_FLOW_FIELD_IDX_MAX
+};
+
+/* Flow headers and fields for AVF support */
+enum ice_flow_avf_hdr_field {
+	/* Values 0 - 28 are reserved for future use */
+	ICE_AVF_FLOW_FIELD_INVALID		= 0,
+	ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP	= 29,
+	ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP,
+	ICE_AVF_FLOW_FIELD_IPV4_UDP,
+	ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK,
+	ICE_AVF_FLOW_FIELD_IPV4_TCP,
+	ICE_AVF_FLOW_FIELD_IPV4_SCTP,
+	ICE_AVF_FLOW_FIELD_IPV4_OTHER,
+	ICE_AVF_FLOW_FIELD_FRAG_IPV4,
+	/* Values 37-38 are reserved */
+	ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP	= 39,
+	ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP,
+	ICE_AVF_FLOW_FIELD_IPV6_UDP,
+	ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK,
+	ICE_AVF_FLOW_FIELD_IPV6_TCP,
+	ICE_AVF_FLOW_FIELD_IPV6_SCTP,
+	ICE_AVF_FLOW_FIELD_IPV6_OTHER,
+	ICE_AVF_FLOW_FIELD_FRAG_IPV6,
+	ICE_AVF_FLOW_FIELD_RSVD47,
+	ICE_AVF_FLOW_FIELD_FCOE_OX,
+	ICE_AVF_FLOW_FIELD_FCOE_RX,
+	ICE_AVF_FLOW_FIELD_FCOE_OTHER,
+	/* Values 51-62 are reserved */
+	ICE_AVF_FLOW_FIELD_L2_PAYLOAD		= 63,
+	ICE_AVF_FLOW_FIELD_MAX
+};
+
+/* Supported RSS offloads  This macro is defined to support
+ * VIRTCHNL_OP_GET_RSS_HENA_CAPS ops. PF driver sends the RSS hardware
+ * capabilities to the caller of this ops.
+ */
+#define ICE_DEFAULT_RSS_HENA ( \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP))
+
+enum ice_flow_dir {
+	ICE_FLOW_RX		= 0x02,
+};
+
+enum ice_flow_priority {
+	ICE_FLOW_PRIO_LOW,
+	ICE_FLOW_PRIO_NORMAL,
+	ICE_FLOW_PRIO_HIGH
+};
+
+#define ICE_FLOW_SEG_MAX		2
+#define ICE_FLOW_SEG_RAW_FLD_MAX	2
+#define ICE_FLOW_FV_EXTRACT_SZ		2
+
+#define ICE_FLOW_SET_HDRS(seg, val)	((seg)->hdrs |= (u32)(val))
+
+struct ice_flow_seg_xtrct {
+	u8 prot_id;	/* Protocol ID of extracted header field */
+	u16 off;	/* Starting offset of the field in header in bytes */
+	u8 idx;		/* Index of FV entry used */
+	u8 disp;	/* Displacement of field in bits fr. FV entry's start */
+};
+
+enum ice_flow_fld_match_type {
+	ICE_FLOW_FLD_TYPE_REG,		/* Value, mask */
+	ICE_FLOW_FLD_TYPE_RANGE,	/* Value, mask, last (upper bound) */
+	ICE_FLOW_FLD_TYPE_PREFIX,	/* IP address, prefix, size of prefix */
+	ICE_FLOW_FLD_TYPE_SIZE,		/* Value, mask, size of match */
+};
+
+struct ice_flow_fld_loc {
+	/* Describe offsets of field information relative to the beginning of
+	 * input buffer provided when adding flow entries.
+	 */
+	u16 val;	/* Offset where the value is located */
+	u16 mask;	/* Offset where the mask/prefix value is located */
+	u16 last;	/* Length or offset where the upper value is located */
+};
+
+struct ice_flow_fld_info {
+	enum ice_flow_fld_match_type type;
+	/* Location where to retrieve data from an input buffer */
+	struct ice_flow_fld_loc src;
+	/* Location where to put the data into the final entry buffer */
+	struct ice_flow_fld_loc entry;
+	struct ice_flow_seg_xtrct xtrct;
+};
+
+struct ice_flow_seg_fld_raw {
+	struct ice_flow_fld_info info;
+	u16 off;	/* Offset from the start of the segment */
+};
+
+struct ice_flow_seg_info {
+	u32 hdrs;	/* Bitmask indicating protocol headers present */
+	u64 match;	/* Bitmask indicating header fields to be matched */
+	u64 range;	/* Bitmask indicating header fields matched as ranges */
+
+	struct ice_flow_fld_info fields[ICE_FLOW_FIELD_IDX_MAX];
+
+	u8 raws_cnt;	/* Number of raw fields to be matched */
+	struct ice_flow_seg_fld_raw raws[ICE_FLOW_SEG_RAW_FLD_MAX];
+};
+
+/* This structure describes a flow entry, and is tracked only in this file */
+struct ice_flow_entry {
+	struct list_head l_entry;
+
+	u64 id;
+	struct ice_flow_prof *prof;
+	/* Flow entry's content */
+	void *entry;
+	enum ice_flow_priority priority;
+	u16 vsi_handle;
+	u16 entry_sz;
+};
+
+#define ICE_FLOW_ENTRY_HNDL(e)	((u64)(uintptr_t)e)
+#define ICE_FLOW_ENTRY_PTR(h)	((struct ice_flow_entry *)(uintptr_t)(h))
+
+struct ice_flow_prof {
+	struct list_head l_entry;
+
+	u64 id;
+	enum ice_flow_dir dir;
+	u8 segs_cnt;
+
+	/* Keep track of flow entries associated with this flow profile */
+	struct mutex entries_lock;
+	struct list_head entries;
+
+	struct ice_flow_seg_info segs[ICE_FLOW_SEG_MAX];
+
+	/* software VSI handles referenced by this flow profile */
+	DECLARE_BITMAP(vsis, ICE_MAX_VSI);
+};
+
+struct ice_rss_cfg {
+	struct list_head l_entry;
+	/* bitmap of VSIs added to the RSS entry */
+	DECLARE_BITMAP(vsis, ICE_MAX_VSI);
+	u64 hashed_flds;
+	u32 packet_hdr;
+};
+
+enum ice_status
+ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
+		  u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
+		  struct ice_flow_prof **prof);
+enum ice_status
+ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id);
+enum ice_status
+ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
+		   u64 entry_id, u16 vsi, enum ice_flow_priority prio,
+		   void *data, u64 *entry_h);
+enum ice_status
+ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk, u64 entry_h);
+void
+ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		 u16 val_loc, u16 mask_loc, u16 last_loc, bool range);
+void
+ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
+		     u16 val_loc, u16 mask_loc);
+void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle);
+enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
+enum ice_status
+ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds);
+enum ice_status ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
+enum ice_status
+ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs);
+u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs);
+#endif /* _ICE_FLOW_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c
new file mode 100644
index 000000000..e27b4de7e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_fltr.h"
+
+/**
+ * ice_fltr_free_list - free filter lists helper
+ * @dev: pointer to the device struct
+ * @h: pointer to the list head to be freed
+ *
+ * Helper function to free filter lists previously created using
+ * ice_fltr_add_mac_to_list
+ */
+void ice_fltr_free_list(struct device *dev, struct list_head *h)
+{
+	struct ice_fltr_list_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, h, list_entry) {
+		list_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
+/**
+ * ice_fltr_add_entry_to_list - allocate and add filter entry to list
+ * @dev: pointer to device needed by alloc function
+ * @info: filter info struct that gets added to the passed in list
+ * @list: pointer to the list which contains MAC filters entry
+ */
+static int
+ice_fltr_add_entry_to_list(struct device *dev, struct ice_fltr_info *info,
+			   struct list_head *list)
+{
+	struct ice_fltr_list_entry *entry;
+
+	entry = devm_kzalloc(dev, sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->fltr_info = *info;
+
+	INIT_LIST_HEAD(&entry->list_entry);
+	list_add(&entry->list_entry, list);
+
+	return 0;
+}
+
+/**
+ * ice_fltr_add_mac_list - add list of MAC filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+enum ice_status
+ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_mac_list - remove list of MAC filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+enum ice_status
+ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_add_vlan_list - add list of VLAN filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static enum ice_status
+ice_fltr_add_vlan_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_vlan(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_vlan_list - remove list of VLAN filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static enum ice_status
+ice_fltr_remove_vlan_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_vlan(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_add_eth_list - add list of ethertype filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static enum ice_status
+ice_fltr_add_eth_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_eth_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_eth_list - remove list of ethertype filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static enum ice_status
+ice_fltr_remove_eth_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_eth_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_all - remove all filters associated with VSI
+ * @vsi: pointer to VSI struct
+ */
+void ice_fltr_remove_all(struct ice_vsi *vsi)
+{
+	ice_remove_vsi_fltr(&vsi->back->hw, vsi->idx);
+}
+
+/**
+ * ice_fltr_add_mac_to_list - add MAC filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @mac: MAC address to add
+ * @action: filter action
+ */
+enum ice_status
+ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 const u8 *mac, enum ice_sw_fwd_act_type action)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = ICE_FLTR_TX;
+	info.src_id = ICE_SRC_ID_VSI;
+	info.lkup_type = ICE_SW_LKUP_MAC;
+	info.fltr_act = action;
+	info.vsi_handle = vsi->idx;
+
+	ether_addr_copy(info.l_data.mac.mac_addr, mac);
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_add_vlan_to_list - add VLAN filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @vlan_id: VLAN ID to add
+ * @action: filter action
+ */
+static int
+ice_fltr_add_vlan_to_list(struct ice_vsi *vsi, struct list_head *list,
+			  u16 vlan_id, enum ice_sw_fwd_act_type action)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = ICE_FLTR_TX;
+	info.src_id = ICE_SRC_ID_VSI;
+	info.lkup_type = ICE_SW_LKUP_VLAN;
+	info.fltr_act = action;
+	info.vsi_handle = vsi->idx;
+	info.l_data.vlan.vlan_id = vlan_id;
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_add_eth_to_list - add ethertype filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @ethertype: ethertype of packet that matches filter
+ * @flag: filter direction, Tx or Rx
+ * @action: filter action
+ */
+static int
+ice_fltr_add_eth_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 u16 ethertype, u16 flag,
+			 enum ice_sw_fwd_act_type action)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = flag;
+	info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+	info.fltr_act = action;
+	info.vsi_handle = vsi->idx;
+	info.l_data.ethertype_mac.ethertype = ethertype;
+
+	if (flag == ICE_FLTR_TX)
+		info.src_id = ICE_SRC_ID_VSI;
+	else
+		info.src_id = ICE_SRC_ID_LPORT;
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_prepare_mac - add or remove MAC rule
+ * @vsi: pointer to VSI struct
+ * @mac: MAC address to add
+ * @action: action to be performed on filter match
+ * @mac_action: pointer to add or remove MAC function
+ */
+static enum ice_status
+ice_fltr_prepare_mac(struct ice_vsi *vsi, const u8 *mac,
+		     enum ice_sw_fwd_act_type action,
+		     enum ice_status (*mac_action)(struct ice_vsi *,
+						   struct list_head *))
+{
+	enum ice_status result;
+	LIST_HEAD(tmp_list);
+
+	if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action)) {
+		ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	result = mac_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_mac_and_broadcast - add or remove MAC and broadcast filter
+ * @vsi: pointer to VSI struct
+ * @mac: MAC address to add
+ * @action: action to be performed on filter match
+ * @mac_action: pointer to add or remove MAC function
+ */
+static enum ice_status
+ice_fltr_prepare_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+				   enum ice_sw_fwd_act_type action,
+				   enum ice_status(*mac_action)
+				   (struct ice_vsi *, struct list_head *))
+{
+	u8 broadcast[ETH_ALEN];
+	enum ice_status result;
+	LIST_HEAD(tmp_list);
+
+	eth_broadcast_addr(broadcast);
+	if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action) ||
+	    ice_fltr_add_mac_to_list(vsi, &tmp_list, broadcast, action)) {
+		ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	result = mac_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_vlan - add or remove VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan_id: VLAN ID to add
+ * @action: action to be performed on filter match
+ * @vlan_action: pointer to add or remove VLAN function
+ */
+static enum ice_status
+ice_fltr_prepare_vlan(struct ice_vsi *vsi, u16 vlan_id,
+		      enum ice_sw_fwd_act_type action,
+		      enum ice_status (*vlan_action)(struct ice_vsi *,
+						     struct list_head *))
+{
+	enum ice_status result;
+	LIST_HEAD(tmp_list);
+
+	if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan_id, action))
+		return ICE_ERR_NO_MEMORY;
+
+	result = vlan_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_eth - add or remove ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of packet to be filtered
+ * @flag: direction of packet, Tx or Rx
+ * @action: action to be performed on filter match
+ * @eth_action: pointer to add or remove ethertype function
+ */
+static enum ice_status
+ice_fltr_prepare_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		     enum ice_sw_fwd_act_type action,
+		     enum ice_status (*eth_action)(struct ice_vsi *,
+						   struct list_head *))
+{
+	enum ice_status result;
+	LIST_HEAD(tmp_list);
+
+	if (ice_fltr_add_eth_to_list(vsi, &tmp_list, ethertype, flag, action))
+		return ICE_ERR_NO_MEMORY;
+
+	result = eth_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_add_mac - add single MAC filter
+ * @vsi: pointer to VSI struct
+ * @mac: MAC to add
+ * @action: action to be performed on filter match
+ */
+enum ice_status ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac,
+				 enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_add_mac_list);
+}
+
+/**
+ * ice_fltr_add_mac_and_broadcast - add single MAC and broadcast
+ * @vsi: pointer to VSI struct
+ * @mac: MAC to add
+ * @action: action to be performed on filter match
+ */
+enum ice_status
+ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+			       enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac_and_broadcast(vsi, mac, action,
+						  ice_fltr_add_mac_list);
+}
+
+/**
+ * ice_fltr_remove_mac - remove MAC filter
+ * @vsi: pointer to VSI struct
+ * @mac: filter MAC to remove
+ * @action: action to remove
+ */
+enum ice_status ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
+				    enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_remove_mac_list);
+}
+
+/**
+ * ice_fltr_add_vlan - add single VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan_id: VLAN ID to add
+ * @action: action to be performed on filter match
+ */
+enum ice_status ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vlan_id,
+				  enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_vlan(vsi, vlan_id, action,
+				     ice_fltr_add_vlan_list);
+}
+
+/**
+ * ice_fltr_remove_vlan - remove VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan_id: filter VLAN to remove
+ * @action: action to remove
+ */
+enum ice_status ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vlan_id,
+				     enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_vlan(vsi, vlan_id, action,
+				     ice_fltr_remove_vlan_list);
+}
+
+/**
+ * ice_fltr_add_eth - add specyfic ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of filter
+ * @flag: direction of packet to be filtered, Tx or Rx
+ * @action: action to be performed on filter match
+ */
+enum ice_status ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+				 enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
+				    ice_fltr_add_eth_list);
+}
+
+/**
+ * ice_fltr_remove_eth - remove ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of filter
+ * @flag: direction of filter
+ * @action: action to remove
+ */
+enum ice_status ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype,
+				    u16 flag, enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
+				    ice_fltr_remove_eth_list);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h
new file mode 100644
index 000000000..361cb4da9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_FLTR_H_
+#define _ICE_FLTR_H_
+
+void ice_fltr_free_list(struct device *dev, struct list_head *h);
+enum ice_status
+ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 const u8 *mac, enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac,
+		 enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+			       enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list);
+enum ice_status
+ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
+		    enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list);
+
+enum ice_status
+ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vid,
+		  enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vid,
+		     enum ice_sw_fwd_act_type action);
+
+enum ice_status
+ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		 enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		    enum ice_sw_fwd_act_type action);
+void ice_fltr_remove_all(struct ice_vsi *vsi);
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c
new file mode 100644
index 000000000..8f81b95e6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c
@@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2019, Intel Corporation. */
+
+#include <asm/unaligned.h>
+#include <linux/uuid.h>
+#include <linux/crc32.h>
+#include <linux/pldmfw.h>
+#include "ice.h"
+#include "ice_fw_update.h"
+
+struct ice_fwu_priv {
+	struct pldmfw context;
+
+	struct ice_pf *pf;
+	struct netlink_ext_ack *extack;
+
+	/* Track which NVM banks to activate at the end of the update */
+	u8 activate_flags;
+};
+
+/**
+ * ice_send_package_data - Send record package data to firmware
+ * @context: PLDM fw update structure
+ * @data: pointer to the package data
+ * @length: length of the package data
+ *
+ * Send a copy of the package data associated with the PLDM record matching
+ * this device to the firmware.
+ *
+ * Note that this function sends an AdminQ command that will fail unless the
+ * NVM resource has been acquired.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_send_package_data(struct pldmfw *context, const u8 *data, u16 length)
+{
+	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+	struct netlink_ext_ack *extack = priv->extack;
+	struct device *dev = context->dev;
+	struct ice_pf *pf = priv->pf;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u8 *package_data;
+
+	dev_dbg(dev, "Sending PLDM record package data to firmware\n");
+
+	package_data = kmemdup(data, length, GFP_KERNEL);
+	if (!package_data)
+		return -ENOMEM;
+
+	status = ice_nvm_set_pkg_data(hw, false, package_data, length, NULL);
+
+	kfree(package_data);
+
+	if (status) {
+		dev_err(dev, "Failed to send record package data to firmware, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to record package data to firmware");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_check_component_response - Report firmware response to a component
+ * @pf: device private data structure
+ * @id: component id being checked
+ * @response: indicates whether this component can be updated
+ * @code: code indicating reason for response
+ * @extack: netlink extended ACK structure
+ *
+ * Check whether firmware indicates if this component can be updated. Report
+ * a suitable error message over the netlink extended ACK if the component
+ * cannot be updated.
+ *
+ * Returns: zero if the component can be updated, or -ECANCELED of the
+ * firmware indicates the component cannot be updated.
+ */
+static int
+ice_check_component_response(struct ice_pf *pf, u16 id, u8 response, u8 code,
+			     struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	const char *component;
+
+	switch (id) {
+	case NVM_COMP_ID_OROM:
+		component = "fw.undi";
+		break;
+	case NVM_COMP_ID_NVM:
+		component = "fw.mgmt";
+		break;
+	case NVM_COMP_ID_NETLIST:
+		component = "fw.netlist";
+		break;
+	default:
+		WARN(1, "Unexpected unknown component identifier 0x%02x", id);
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "%s: firmware response 0x%x, code 0x%x\n",
+		component, response, code);
+
+	switch (response) {
+	case ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED:
+		/* firmware indicated this update is good to proceed */
+		return 0;
+	case ICE_AQ_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE:
+		dev_warn(dev, "firmware recommends not updating %s, as it may result in a downgrade. continuing anyways\n", component);
+		return 0;
+	case ICE_AQ_NVM_PASS_COMP_CAN_NOT_BE_UPDATED:
+		dev_info(dev, "firmware has rejected updating %s\n", component);
+		break;
+	}
+
+	switch (code) {
+	case ICE_AQ_NVM_PASS_COMP_STAMP_IDENTICAL_CODE:
+		dev_err(dev, "Component comparison stamp for %s is identical to the running image\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is identical to running image");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_STAMP_LOWER:
+		dev_err(dev, "Component comparison stamp for %s is lower than the running image\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is lower than running image");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_INVALID_STAMP_CODE:
+		dev_err(dev, "Component comparison stamp for %s is invalid\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is invalid");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_CONFLICT_CODE:
+		dev_err(dev, "%s conflicts with a previous component table\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component table conflict occurred");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE:
+		dev_err(dev, "Pre-requisites for component %s have not been met\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component pre-requisites not met");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_NOT_SUPPORTED_CODE:
+		dev_err(dev, "%s is not a supported component\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component not supported");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE:
+		dev_err(dev, "Security restrictions prevent %s from being downgraded\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component cannot be downgraded");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE:
+		dev_err(dev, "Received an incomplete component image for %s\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Incomplete component image");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE:
+		dev_err(dev, "Component version for %s is identical to the running image\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component version is identical to running image");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_VER_STR_LOWER_CODE:
+		dev_err(dev, "Component version for %s is lower than the running image\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component version is lower than the running image");
+		break;
+	default:
+		dev_err(dev, "Unexpected response code 0x02%x for %s\n",
+			code, component);
+		NL_SET_ERR_MSG_MOD(extack, "Received unexpected response code from firmware");
+		break;
+	}
+
+	return -ECANCELED;
+}
+
+/**
+ * ice_send_component_table - Send PLDM component table to firmware
+ * @context: PLDM fw update structure
+ * @component: the component to process
+ * @transfer_flag: relative transfer order of this component
+ *
+ * Read relevant data from the component and forward it to the device
+ * firmware. Check the response to determine if the firmware indicates that
+ * the update can proceed.
+ *
+ * This function sends AdminQ commands related to the NVM, and assumes that
+ * the NVM resource has been acquired.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_send_component_table(struct pldmfw *context, struct pldmfw_component *component,
+			 u8 transfer_flag)
+{
+	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+	struct netlink_ext_ack *extack = priv->extack;
+	struct ice_aqc_nvm_comp_tbl *comp_tbl;
+	u8 comp_response, comp_response_code;
+	struct device *dev = context->dev;
+	struct ice_pf *pf = priv->pf;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	size_t length;
+
+	switch (component->identifier) {
+	case NVM_COMP_ID_OROM:
+	case NVM_COMP_ID_NVM:
+	case NVM_COMP_ID_NETLIST:
+		break;
+	default:
+		dev_err(dev, "Unable to update due to a firmware component with unknown ID %u\n",
+			component->identifier);
+		NL_SET_ERR_MSG_MOD(extack, "Unable to update due to unknown firmware component");
+		return -EOPNOTSUPP;
+	}
+
+	length = struct_size(comp_tbl, cvs, component->version_len);
+	comp_tbl = kzalloc(length, GFP_KERNEL);
+	if (!comp_tbl)
+		return -ENOMEM;
+
+	comp_tbl->comp_class = cpu_to_le16(component->classification);
+	comp_tbl->comp_id = cpu_to_le16(component->identifier);
+	comp_tbl->comp_class_idx = FWU_COMP_CLASS_IDX_NOT_USE;
+	comp_tbl->comp_cmp_stamp = cpu_to_le32(component->comparison_stamp);
+	comp_tbl->cvs_type = component->version_type;
+	comp_tbl->cvs_len = component->version_len;
+	memcpy(comp_tbl->cvs, component->version_string, component->version_len);
+
+	dev_dbg(dev, "Sending component table to firmware:\n");
+
+	status = ice_nvm_pass_component_tbl(hw, (u8 *)comp_tbl, length,
+					    transfer_flag, &comp_response,
+					    &comp_response_code, NULL);
+
+	kfree(comp_tbl);
+
+	if (status) {
+		dev_err(dev, "Failed to transfer component table to firmware, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to transfer component table to firmware");
+		return -EIO;
+	}
+
+	return ice_check_component_response(pf, component->identifier, comp_response,
+					    comp_response_code, extack);
+}
+
+/**
+ * ice_write_one_nvm_block - Write an NVM block and await completion response
+ * @pf: the PF data structure
+ * @module: the module to write to
+ * @offset: offset in bytes
+ * @block_size: size of the block to write, up to 4k
+ * @block: pointer to block of data to write
+ * @last_cmd: whether this is the last command
+ * @extack: netlink extended ACK structure
+ *
+ * Write a block of data to a flash module, and await for the completion
+ * response message from firmware.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
+			u16 block_size, u8 *block, bool last_cmd,
+			struct netlink_ext_ack *extack)
+{
+	u16 completion_module, completion_retval;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_rq_event_info event;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u32 completion_offset;
+	int err;
+
+	memset(&event, 0, sizeof(event));
+
+	dev_dbg(dev, "Writing block of %u bytes for module 0x%02x at offset %u\n",
+		block_size, module, offset);
+
+	status = ice_aq_update_nvm(hw, module, offset, block_size, block,
+				   last_cmd, 0, NULL);
+	if (status) {
+		dev_err(dev, "Failed to flash module 0x%02x with block of size %u at offset %u, err %s aq_err %s\n",
+			module, block_size, offset, ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to program flash module");
+		return -EIO;
+	}
+
+	/* In most cases, firmware reports a write completion within a few
+	 * milliseconds. However, it has been observed that a completion might
+	 * take more than a second to complete in some cases. The timeout here
+	 * is conservative and is intended to prevent failure to update when
+	 * firmware is slow to respond.
+	 */
+	err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, 15 * HZ, &event);
+	if (err) {
+		dev_err(dev, "Timed out while trying to flash module 0x%02x with block of size %u at offset %u, err %d\n",
+			module, block_size, offset, err);
+		NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+		return -EIO;
+	}
+
+	completion_module = le16_to_cpu(event.desc.params.nvm.module_typeid);
+	completion_retval = le16_to_cpu(event.desc.retval);
+
+	completion_offset = le16_to_cpu(event.desc.params.nvm.offset_low);
+	completion_offset |= event.desc.params.nvm.offset_high << 16;
+
+	if (completion_module != module) {
+		dev_err(dev, "Unexpected module_typeid in write completion: got 0x%x, expected 0x%x\n",
+			completion_module, module);
+		NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+		return -EIO;
+	}
+
+	if (completion_offset != offset) {
+		dev_err(dev, "Unexpected offset in write completion: got %u, expected %u\n",
+			completion_offset, offset);
+		NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+		return -EIO;
+	}
+
+	if (completion_retval) {
+		dev_err(dev, "Firmware failed to flash module 0x%02x with block of size %u at offset %u, err %s\n",
+			module, block_size, offset,
+			ice_aq_str((enum ice_aq_err)completion_retval));
+		NL_SET_ERR_MSG_MOD(extack, "Firmware failed to program flash module");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_write_nvm_module - Write data to an NVM module
+ * @pf: the PF driver structure
+ * @module: the module id to program
+ * @component: the name of the component being updated
+ * @image: buffer of image data to write to the NVM
+ * @length: length of the buffer
+ * @extack: netlink extended ACK structure
+ *
+ * Loop over the data for a given NVM module and program it in 4 Kb
+ * blocks. Notify devlink core of progress after each block is programmed.
+ * Loops over a block of data and programs the NVM in 4k block chunks.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_write_nvm_module(struct ice_pf *pf, u16 module, const char *component,
+		     const u8 *image, u32 length,
+		     struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct devlink *devlink;
+	u32 offset = 0;
+	bool last_cmd;
+	u8 *block;
+	int err;
+
+	dev_dbg(dev, "Beginning write of flash component '%s', module 0x%02x\n", component, module);
+
+	devlink = priv_to_devlink(pf);
+
+	devlink_flash_update_status_notify(devlink, "Flashing",
+					   component, 0, length);
+
+	block = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!block)
+		return -ENOMEM;
+
+	do {
+		u32 block_size;
+
+		block_size = min_t(u32, ICE_AQ_MAX_BUF_LEN, length - offset);
+		last_cmd = !(offset + block_size < length);
+
+		/* ice_aq_update_nvm may copy the firmware response into the
+		 * buffer, so we must make a copy since the source data is
+		 * constant.
+		 */
+		memcpy(block, image + offset, block_size);
+
+		err = ice_write_one_nvm_block(pf, module, offset, block_size,
+					      block, last_cmd, extack);
+		if (err)
+			break;
+
+		offset += block_size;
+
+		devlink_flash_update_status_notify(devlink, "Flashing",
+						   component, offset, length);
+	} while (!last_cmd);
+
+	dev_dbg(dev, "Completed write of flash component '%s', module 0x%02x\n", component, module);
+
+	if (err)
+		devlink_flash_update_status_notify(devlink, "Flashing failed",
+						   component, length, length);
+	else
+		devlink_flash_update_status_notify(devlink, "Flashing done",
+						   component, length, length);
+
+	kfree(block);
+	return err;
+}
+
+/**
+ * ice_erase_nvm_module - Erase an NVM module and await firmware completion
+ * @pf: the PF data structure
+ * @module: the module to erase
+ * @component: name of the component being updated
+ * @extack: netlink extended ACK structure
+ *
+ * Erase the inactive NVM bank associated with this module, and await for
+ * a completion response message from firmware.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component,
+		     struct netlink_ext_ack *extack)
+{
+	u16 completion_module, completion_retval;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_rq_event_info event;
+	struct ice_hw *hw = &pf->hw;
+	struct devlink *devlink;
+	enum ice_status status;
+	int err;
+
+	dev_dbg(dev, "Beginning erase of flash component '%s', module 0x%02x\n", component, module);
+
+	memset(&event, 0, sizeof(event));
+
+	devlink = priv_to_devlink(pf);
+
+	devlink_flash_update_status_notify(devlink, "Erasing", component, 0, 0);
+
+	status = ice_aq_erase_nvm(hw, module, NULL);
+	if (status) {
+		dev_err(dev, "Failed to erase %s (module 0x%02x), err %s aq_err %s\n",
+			component, module, ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to erase flash module");
+		err = -EIO;
+		goto out_notify_devlink;
+	}
+
+	/* Yes, this really can take minutes to complete */
+	err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_erase, 300 * HZ, &event);
+	if (err) {
+		dev_err(dev, "Timed out waiting for firmware to respond with erase completion for %s (module 0x%02x), err %d\n",
+			component, module, err);
+		NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+		goto out_notify_devlink;
+	}
+
+	completion_module = le16_to_cpu(event.desc.params.nvm.module_typeid);
+	completion_retval = le16_to_cpu(event.desc.retval);
+
+	if (completion_module != module) {
+		dev_err(dev, "Unexpected module_typeid in erase completion for %s: got 0x%x, expected 0x%x\n",
+			component, completion_module, module);
+		NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+		err = -EIO;
+		goto out_notify_devlink;
+	}
+
+	if (completion_retval) {
+		dev_err(dev, "Firmware failed to erase %s (module 0x02%x), aq_err %s\n",
+			component, module,
+			ice_aq_str((enum ice_aq_err)completion_retval));
+		NL_SET_ERR_MSG_MOD(extack, "Firmware failed to erase flash");
+		err = -EIO;
+		goto out_notify_devlink;
+	}
+
+	dev_dbg(dev, "Completed erase of flash component '%s', module 0x%02x\n", component, module);
+
+out_notify_devlink:
+	if (err)
+		devlink_flash_update_status_notify(devlink, "Erasing failed",
+						   component, 0, 0);
+	else
+		devlink_flash_update_status_notify(devlink, "Erasing done",
+						   component, 0, 0);
+
+	return err;
+}
+
+/**
+ * ice_switch_flash_banks - Tell firmware to switch NVM banks
+ * @pf: Pointer to the PF data structure
+ * @activate_flags: flags used for the activation command
+ * @extack: netlink extended ACK structure
+ *
+ * Notify firmware to activate the newly written flash banks, and wait for the
+ * firmware response.
+ *
+ * Returns: zero on success or an error code on failure.
+ */
+static int ice_switch_flash_banks(struct ice_pf *pf, u8 activate_flags,
+				  struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_rq_event_info event;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u16 completion_retval;
+	int err;
+
+	memset(&event, 0, sizeof(event));
+
+	status = ice_nvm_write_activate(hw, activate_flags);
+	if (status) {
+		dev_err(dev, "Failed to switch active flash banks, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to switch active flash banks");
+		return -EIO;
+	}
+
+	err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, 30 * HZ,
+				    &event);
+	if (err) {
+		dev_err(dev, "Timed out waiting for firmware to switch active flash banks, err %d\n",
+			err);
+		NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+		return err;
+	}
+
+	completion_retval = le16_to_cpu(event.desc.retval);
+	if (completion_retval) {
+		dev_err(dev, "Firmware failed to switch active flash banks aq_err %s\n",
+			ice_aq_str((enum ice_aq_err)completion_retval));
+		NL_SET_ERR_MSG_MOD(extack, "Firmware failed to switch active flash banks");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flash_component - Flash a component of the NVM
+ * @context: PLDM fw update structure
+ * @component: the component table to program
+ *
+ * Program the flash contents for a given component. First, determine the
+ * module id. Then, erase the secondary bank for this module. Finally, write
+ * the contents of the component to the NVM.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_flash_component(struct pldmfw *context, struct pldmfw_component *component)
+{
+	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+	struct netlink_ext_ack *extack = priv->extack;
+	struct ice_pf *pf = priv->pf;
+	const char *name;
+	u16 module;
+	u8 flag;
+	int err;
+
+	switch (component->identifier) {
+	case NVM_COMP_ID_OROM:
+		module = ICE_SR_1ST_OROM_BANK_PTR;
+		flag = ICE_AQC_NVM_ACTIV_SEL_OROM;
+		name = "fw.undi";
+		break;
+	case NVM_COMP_ID_NVM:
+		module = ICE_SR_1ST_NVM_BANK_PTR;
+		flag = ICE_AQC_NVM_ACTIV_SEL_NVM;
+		name = "fw.mgmt";
+		break;
+	case NVM_COMP_ID_NETLIST:
+		module = ICE_SR_NETLIST_BANK_PTR;
+		flag = ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+		name = "fw.netlist";
+		break;
+	default:
+		/* This should not trigger, since we check the id before
+		 * sending the component table to firmware.
+		 */
+		WARN(1, "Unexpected unknown component identifier 0x%02x",
+		     component->identifier);
+		return -EINVAL;
+	}
+
+	/* Mark this component for activating at the end */
+	priv->activate_flags |= flag;
+
+	err = ice_erase_nvm_module(pf, module, name, extack);
+	if (err)
+		return err;
+
+	return ice_write_nvm_module(pf, module, name, component->component_data,
+				    component->component_size, extack);
+}
+
+/**
+ * ice_finalize_update - Perform last steps to complete device update
+ * @context: PLDM fw update structure
+ *
+ * Called as the last step of the update process. Complete the update by
+ * telling the firmware to switch active banks, and perform a reset of
+ * configured.
+ *
+ * Returns: 0 on success, or an error code on failure.
+ */
+static int ice_finalize_update(struct pldmfw *context)
+{
+	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+	struct netlink_ext_ack *extack = priv->extack;
+	struct ice_pf *pf = priv->pf;
+
+	/* Finally, notify firmware to activate the written NVM banks */
+	return ice_switch_flash_banks(pf, priv->activate_flags, extack);
+}
+
+static const struct pldmfw_ops ice_fwu_ops = {
+	.match_record = &pldmfw_op_pci_match_record,
+	.send_package_data = &ice_send_package_data,
+	.send_component_table = &ice_send_component_table,
+	.flash_component = &ice_flash_component,
+	.finalize_update = &ice_finalize_update,
+};
+
+/**
+ * ice_flash_pldm_image - Write a PLDM-formatted firmware image to the device
+ * @pf: private device driver structure
+ * @fw: firmware object pointing to the relevant firmware file
+ * @preservation: preservation level to request from firmware
+ * @extack: netlink extended ACK structure
+ *
+ * Parse the data for a given firmware file, verifying that it is a valid PLDM
+ * formatted image that matches this device.
+ *
+ * Extract the device record Package Data and Component Tables and send them
+ * to the firmware. Extract and write the flash data for each of the three
+ * main flash components, "fw.mgmt", "fw.undi", and "fw.netlist". Notify
+ * firmware once the data is written to the inactive banks.
+ *
+ * Returns: zero on success or a negative error code on failure.
+ */
+int ice_flash_pldm_image(struct ice_pf *pf, const struct firmware *fw,
+			 u8 preservation, struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_fwu_priv priv;
+	enum ice_status status;
+	int err;
+
+	switch (preservation) {
+	case ICE_AQC_NVM_PRESERVE_ALL:
+	case ICE_AQC_NVM_PRESERVE_SELECTED:
+	case ICE_AQC_NVM_NO_PRESERVATION:
+	case ICE_AQC_NVM_FACTORY_DEFAULT:
+		break;
+	default:
+		WARN(1, "Unexpected preservation level request %u", preservation);
+		return -EINVAL;
+	}
+
+	memset(&priv, 0, sizeof(priv));
+
+	priv.context.ops = &ice_fwu_ops;
+	priv.context.dev = dev;
+	priv.extack = extack;
+	priv.pf = pf;
+	priv.activate_flags = preservation;
+
+	status = ice_acquire_nvm(hw, ICE_RES_WRITE);
+	if (status) {
+		dev_err(dev, "Failed to acquire device flash lock, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire device flash lock");
+		return -EIO;
+	}
+
+	err = pldmfw_flash_image(&priv.context, fw);
+
+	ice_release_nvm(hw);
+
+	return err;
+}
+
+/**
+ * ice_check_for_pending_update - Check for a pending flash update
+ * @pf: the PF driver structure
+ * @component: if not NULL, the name of the component being updated
+ * @extack: Netlink extended ACK structure
+ *
+ * Check whether the device already has a pending flash update. If such an
+ * update is found, cancel it so that the requested update may proceed.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+int ice_check_for_pending_update(struct ice_pf *pf, const char *component,
+				 struct netlink_ext_ack *extack)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw_dev_caps *dev_caps;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u8 pending = 0;
+	int err;
+
+	dev_caps = kzalloc(sizeof(*dev_caps), GFP_KERNEL);
+	if (!dev_caps)
+		return -ENOMEM;
+
+	/* Read the most recent device capabilities from firmware. Do not use
+	 * the cached values in hw->dev_caps, because the pending update flag
+	 * may have changed, e.g. if an update was previously completed and
+	 * the system has not yet rebooted.
+	 */
+	status = ice_discover_dev_caps(hw, dev_caps);
+	if (status) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to read device capabilities");
+		kfree(dev_caps);
+		return -EIO;
+	}
+
+	if (dev_caps->common_cap.nvm_update_pending_nvm) {
+		dev_info(dev, "The fw.mgmt flash component has a pending update\n");
+		pending |= ICE_AQC_NVM_ACTIV_SEL_NVM;
+	}
+
+	if (dev_caps->common_cap.nvm_update_pending_orom) {
+		dev_info(dev, "The fw.undi flash component has a pending update\n");
+		pending |= ICE_AQC_NVM_ACTIV_SEL_OROM;
+	}
+
+	if (dev_caps->common_cap.nvm_update_pending_netlist) {
+		dev_info(dev, "The fw.netlist flash component has a pending update\n");
+		pending |= ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+	}
+
+	kfree(dev_caps);
+
+	/* If the flash_update request is for a specific component, ignore all
+	 * of the other components.
+	 */
+	if (component) {
+		if (strcmp(component, "fw.mgmt") == 0)
+			pending &= ICE_AQC_NVM_ACTIV_SEL_NVM;
+		else if (strcmp(component, "fw.undi") == 0)
+			pending &= ICE_AQC_NVM_ACTIV_SEL_OROM;
+		else if (strcmp(component, "fw.netlist") == 0)
+			pending &= ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+		else
+			WARN(1, "Unexpected flash component %s", component);
+	}
+
+	/* There is no previous pending update, so this request may continue */
+	if (!pending)
+		return 0;
+
+	/* In order to allow overwriting a previous pending update, notify
+	 * firmware to cancel that update by issuing the appropriate command.
+	 */
+	devlink_flash_update_status_notify(devlink,
+					   "Canceling previous pending update",
+					   component, 0, 0);
+
+	status = ice_acquire_nvm(hw, ICE_RES_WRITE);
+	if (status) {
+		dev_err(dev, "Failed to acquire device flash lock, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire device flash lock");
+		return -EIO;
+	}
+
+	pending |= ICE_AQC_NVM_REVERT_LAST_ACTIV;
+	err = ice_switch_flash_banks(pf, pending, extack);
+
+	ice_release_nvm(hw);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.h b/drivers/net/ethernet/intel/ice/ice_fw_update.h
new file mode 100644
index 000000000..c6390f685
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2019, Intel Corporation. */
+
+#ifndef _ICE_FW_UPDATE_H_
+#define _ICE_FW_UPDATE_H_
+
+int ice_flash_pldm_image(struct ice_pf *pf, const struct firmware *fw,
+			 u8 preservation, struct netlink_ext_ack *extack);
+int ice_check_for_pending_update(struct ice_pf *pf, const char *component,
+				 struct netlink_ext_ack *extack);
+
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
new file mode 100644
index 000000000..406dd6bd9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Machine-generated file */
+
+#ifndef _ICE_HW_AUTOGEN_H_
+#define _ICE_HW_AUTOGEN_H_
+
+#define QTX_COMM_DBELL(_DBQM)			(0x002C0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD(_DBQM)			(0x000E0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD_HEAD_S			0
+#define QTX_COMM_HEAD_HEAD_M			ICE_M(0x1FFF, 0)
+#define PF_FW_ARQBAH				0x00080180
+#define PF_FW_ARQBAL				0x00080080
+#define PF_FW_ARQH				0x00080380
+#define PF_FW_ARQH_ARQH_M			ICE_M(0x3FF, 0)
+#define PF_FW_ARQLEN				0x00080280
+#define PF_FW_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_FW_ARQLEN_ARQVFE_M			BIT(28)
+#define PF_FW_ARQLEN_ARQOVFL_M			BIT(29)
+#define PF_FW_ARQLEN_ARQCRIT_M			BIT(30)
+#define PF_FW_ARQLEN_ARQENABLE_M		BIT(31)
+#define PF_FW_ARQT				0x00080480
+#define PF_FW_ATQBAH				0x00080100
+#define PF_FW_ATQBAL				0x00080000
+#define PF_FW_ATQH				0x00080300
+#define PF_FW_ATQH_ATQH_M			ICE_M(0x3FF, 0)
+#define PF_FW_ATQLEN				0x00080200
+#define PF_FW_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_FW_ATQLEN_ATQVFE_M			BIT(28)
+#define PF_FW_ATQLEN_ATQOVFL_M			BIT(29)
+#define PF_FW_ATQLEN_ATQCRIT_M			BIT(30)
+#define VF_MBX_ARQLEN(_VF)			(0x0022BC00 + ((_VF) * 4))
+#define VF_MBX_ATQLEN(_VF)			(0x0022A800 + ((_VF) * 4))
+#define PF_FW_ATQLEN_ATQENABLE_M		BIT(31)
+#define PF_FW_ATQT				0x00080400
+#define PF_MBX_ARQBAH				0x0022E400
+#define PF_MBX_ARQBAL				0x0022E380
+#define PF_MBX_ARQH				0x0022E500
+#define PF_MBX_ARQH_ARQH_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN				0x0022E480
+#define PF_MBX_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN_ARQCRIT_M			BIT(30)
+#define PF_MBX_ARQLEN_ARQENABLE_M		BIT(31)
+#define PF_MBX_ARQT				0x0022E580
+#define PF_MBX_ATQBAH				0x0022E180
+#define PF_MBX_ATQBAL				0x0022E100
+#define PF_MBX_ATQH				0x0022E280
+#define PF_MBX_ATQH_ATQH_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN				0x0022E200
+#define PF_MBX_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN_ATQCRIT_M			BIT(30)
+#define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
+#define PF_MBX_ATQT				0x0022E300
+#define PRTDCB_GENC				0x00083000
+#define PRTDCB_GENC_PFCLDA_S			16
+#define PRTDCB_GENC_PFCLDA_M			ICE_M(0xFFFF, 16)
+#define PRTDCB_GENS				0x00083020
+#define PRTDCB_GENS_DCBX_STATUS_S		0
+#define PRTDCB_GENS_DCBX_STATUS_M		ICE_M(0x7, 0)
+#define PRTDCB_TUP2TC				0x001D26C0
+#define GL_PREEXT_L2_PMASK0(_i)			(0x0020F0FC + ((_i) * 4))
+#define GL_PREEXT_L2_PMASK1(_i)			(0x0020F108 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_0(_i)		(0x0045c800 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M	ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_1(_i)		(0x0045c900 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M	ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_2(_i)		(0x0045ca00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M	ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_3(_i)		(0x0045cb00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M	ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define QRXFLXP_CNTXT(_QRX)			(0x00480000 + ((_QRX) * 4))
+#define QRXFLXP_CNTXT_RXDID_IDX_S		0
+#define QRXFLXP_CNTXT_RXDID_IDX_M		ICE_M(0x3F, 0)
+#define QRXFLXP_CNTXT_RXDID_PRIO_S		8
+#define QRXFLXP_CNTXT_RXDID_PRIO_M		ICE_M(0x7, 8)
+#define QRXFLXP_CNTXT_TS_M			BIT(11)
+#define GLGEN_RSTAT				0x000B8188
+#define GLGEN_RSTAT_DEVSTATE_M			ICE_M(0x3, 0)
+#define GLGEN_RSTCTL				0x000B8180
+#define GLGEN_RSTCTL_GRSTDEL_S			0
+#define GLGEN_RSTCTL_GRSTDEL_M			ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
+#define GLGEN_RSTAT_RESET_TYPE_S		2
+#define GLGEN_RSTAT_RESET_TYPE_M		ICE_M(0x3, 2)
+#define GLGEN_RTRIG				0x000B8190
+#define GLGEN_RTRIG_CORER_M			BIT(0)
+#define GLGEN_RTRIG_GLOBR_M			BIT(1)
+#define GLGEN_STAT				0x000B612C
+#define GLGEN_VFLRSTAT(_i)			(0x00093A04 + ((_i) * 4))
+#define PFGEN_CTRL				0x00091000
+#define PFGEN_CTRL_PFSWR_M			BIT(0)
+#define PFGEN_STATE				0x00088000
+#define PRTGEN_STATUS				0x000B8100
+#define VFGEN_RSTAT(_VF)			(0x00074000 + ((_VF) * 4))
+#define VPGEN_VFRSTAT(_VF)			(0x00090800 + ((_VF) * 4))
+#define VPGEN_VFRSTAT_VFRD_M			BIT(0)
+#define VPGEN_VFRTRIG(_VF)			(0x00090000 + ((_VF) * 4))
+#define VPGEN_VFRTRIG_VFSWR_M			BIT(0)
+#define PFHMC_ERRORDATA				0x00520500
+#define PFHMC_ERRORINFO				0x00520400
+#define GLINT_CTL				0x0016CC54
+#define GLINT_CTL_DIS_AUTOMASK_M		BIT(0)
+#define GLINT_CTL_ITR_GRAN_200_S		16
+#define GLINT_CTL_ITR_GRAN_200_M		ICE_M(0xF, 16)
+#define GLINT_CTL_ITR_GRAN_100_S		20
+#define GLINT_CTL_ITR_GRAN_100_M		ICE_M(0xF, 20)
+#define GLINT_CTL_ITR_GRAN_50_S			24
+#define GLINT_CTL_ITR_GRAN_50_M			ICE_M(0xF, 24)
+#define GLINT_CTL_ITR_GRAN_25_S			28
+#define GLINT_CTL_ITR_GRAN_25_M			ICE_M(0xF, 28)
+#define GLINT_DYN_CTL(_INT)			(0x00160000 + ((_INT) * 4))
+#define GLINT_DYN_CTL_INTENA_M			BIT(0)
+#define GLINT_DYN_CTL_CLEARPBA_M		BIT(1)
+#define GLINT_DYN_CTL_SWINT_TRIG_M		BIT(2)
+#define GLINT_DYN_CTL_ITR_INDX_S		3
+#define GLINT_DYN_CTL_ITR_INDX_M		ICE_M(0x3, 3)
+#define GLINT_DYN_CTL_INTERVAL_S		5
+#define GLINT_DYN_CTL_INTERVAL_M		ICE_M(0xFFF, 5)
+#define GLINT_DYN_CTL_SW_ITR_INDX_M		ICE_M(0x3, 25)
+#define GLINT_DYN_CTL_WB_ON_ITR_M		BIT(30)
+#define GLINT_DYN_CTL_INTENA_MSK_M		BIT(31)
+#define GLINT_ITR(_i, _INT)			(0x00154000 + ((_i) * 8192 + (_INT) * 4))
+#define GLINT_RATE(_INT)			(0x0015A000 + ((_INT) * 4))
+#define GLINT_RATE_INTRL_ENA_M			BIT(6)
+#define GLINT_VECT2FUNC(_INT)			(0x00162000 + ((_INT) * 4))
+#define GLINT_VECT2FUNC_VF_NUM_S		0
+#define GLINT_VECT2FUNC_VF_NUM_M		ICE_M(0xFF, 0)
+#define GLINT_VECT2FUNC_PF_NUM_S		12
+#define GLINT_VECT2FUNC_PF_NUM_M		ICE_M(0x7, 12)
+#define GLINT_VECT2FUNC_IS_PF_S			16
+#define GLINT_VECT2FUNC_IS_PF_M			BIT(16)
+#define PFINT_FW_CTL				0x0016C800
+#define PFINT_FW_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_FW_CTL_ITR_INDX_S			11
+#define PFINT_FW_CTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define PFINT_FW_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_MBX_CTL				0x0016B280
+#define PFINT_MBX_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_MBX_CTL_ITR_INDX_S		11
+#define PFINT_MBX_CTL_ITR_INDX_M		ICE_M(0x3, 11)
+#define PFINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_OICR				0x0016CA00
+#define PFINT_OICR_ECC_ERR_M			BIT(16)
+#define PFINT_OICR_MAL_DETECT_M			BIT(19)
+#define PFINT_OICR_GRST_M			BIT(20)
+#define PFINT_OICR_PCI_EXCEPTION_M		BIT(21)
+#define PFINT_OICR_HMC_ERR_M			BIT(26)
+#define PFINT_OICR_PE_CRITERR_M			BIT(28)
+#define PFINT_OICR_VFLR_M			BIT(29)
+#define PFINT_OICR_SWINT_M			BIT(31)
+#define PFINT_OICR_CTL				0x0016CA80
+#define PFINT_OICR_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_OICR_CTL_ITR_INDX_S		11
+#define PFINT_OICR_CTL_ITR_INDX_M		ICE_M(0x3, 11)
+#define PFINT_OICR_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_OICR_ENA				0x0016C900
+#define QINT_RQCTL(_QRX)			(0x00150000 + ((_QRX) * 4))
+#define QINT_RQCTL_MSIX_INDX_S			0
+#define QINT_RQCTL_MSIX_INDX_M			ICE_M(0x7FF, 0)
+#define QINT_RQCTL_ITR_INDX_S			11
+#define QINT_RQCTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define QINT_RQCTL_CAUSE_ENA_M			BIT(30)
+#define QINT_TQCTL(_DBQM)			(0x00140000 + ((_DBQM) * 4))
+#define QINT_TQCTL_MSIX_INDX_S			0
+#define QINT_TQCTL_MSIX_INDX_M			ICE_M(0x7FF, 0)
+#define QINT_TQCTL_ITR_INDX_S			11
+#define QINT_TQCTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define QINT_TQCTL_CAUSE_ENA_M			BIT(30)
+#define VPINT_ALLOC(_VF)			(0x001D1000 + ((_VF) * 4))
+#define VPINT_ALLOC_FIRST_S			0
+#define VPINT_ALLOC_FIRST_M			ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_LAST_S			12
+#define VPINT_ALLOC_LAST_M			ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_VALID_M			BIT(31)
+#define VPINT_ALLOC_PCI(_VF)			(0x0009D000 + ((_VF) * 4))
+#define VPINT_ALLOC_PCI_FIRST_S			0
+#define VPINT_ALLOC_PCI_FIRST_M			ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_PCI_LAST_S			12
+#define VPINT_ALLOC_PCI_LAST_M			ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_PCI_VALID_M			BIT(31)
+#define VPINT_MBX_CTL(_VSI)			(0x0016A000 + ((_VSI) * 4))
+#define VPINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
+#define GLLAN_RCTL_0				0x002941F8
+#define QRX_CONTEXT(_i, _QRX)			(0x00280000 + ((_i) * 8192 + (_QRX) * 4))
+#define QRX_CTRL(_QRX)				(0x00120000 + ((_QRX) * 4))
+#define QRX_CTRL_MAX_INDEX			2047
+#define QRX_CTRL_QENA_REQ_S			0
+#define QRX_CTRL_QENA_REQ_M			BIT(0)
+#define QRX_CTRL_QENA_STAT_S			2
+#define QRX_CTRL_QENA_STAT_M			BIT(2)
+#define QRX_ITR(_QRX)				(0x00292000 + ((_QRX) * 4))
+#define QRX_TAIL(_QRX)				(0x00290000 + ((_QRX) * 4))
+#define QRX_TAIL_MAX_INDEX			2047
+#define QRX_TAIL_TAIL_S				0
+#define QRX_TAIL_TAIL_M				ICE_M(0x1FFF, 0)
+#define VPLAN_RX_QBASE(_VF)			(0x00072000 + ((_VF) * 4))
+#define VPLAN_RX_QBASE_VFFIRSTQ_S		0
+#define VPLAN_RX_QBASE_VFFIRSTQ_M		ICE_M(0x7FF, 0)
+#define VPLAN_RX_QBASE_VFNUMQ_S			16
+#define VPLAN_RX_QBASE_VFNUMQ_M			ICE_M(0xFF, 16)
+#define VPLAN_RXQ_MAPENA(_VF)			(0x00073000 + ((_VF) * 4))
+#define VPLAN_RXQ_MAPENA_RX_ENA_M		BIT(0)
+#define VPLAN_TX_QBASE(_VF)			(0x001D1800 + ((_VF) * 4))
+#define VPLAN_TX_QBASE_VFFIRSTQ_S		0
+#define VPLAN_TX_QBASE_VFFIRSTQ_M		ICE_M(0x3FFF, 0)
+#define VPLAN_TX_QBASE_VFNUMQ_S			16
+#define VPLAN_TX_QBASE_VFNUMQ_M			ICE_M(0xFF, 16)
+#define VPLAN_TXQ_MAPENA(_VF)			(0x00073800 + ((_VF) * 4))
+#define VPLAN_TXQ_MAPENA_TX_ENA_M		BIT(0)
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(_i)	(0x001E36E0 + ((_i) * 32))
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX 8
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_M ICE_M(0xFFFF, 0)
+#define PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(_i) (0x001E3800 + ((_i) * 32))
+#define PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_M ICE_M(0xFFFF, 0)
+#define GL_MDCK_TX_TDPU				0x00049348
+#define GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M BIT(1)
+#define GL_MDET_RX				0x00294C00
+#define GL_MDET_RX_QNUM_S			0
+#define GL_MDET_RX_QNUM_M			ICE_M(0x7FFF, 0)
+#define GL_MDET_RX_VF_NUM_S			15
+#define GL_MDET_RX_VF_NUM_M			ICE_M(0xFF, 15)
+#define GL_MDET_RX_PF_NUM_S			23
+#define GL_MDET_RX_PF_NUM_M			ICE_M(0x7, 23)
+#define GL_MDET_RX_MAL_TYPE_S			26
+#define GL_MDET_RX_MAL_TYPE_M			ICE_M(0x1F, 26)
+#define GL_MDET_RX_VALID_M			BIT(31)
+#define GL_MDET_TX_PQM				0x002D2E00
+#define GL_MDET_TX_PQM_PF_NUM_S			0
+#define GL_MDET_TX_PQM_PF_NUM_M			ICE_M(0x7, 0)
+#define GL_MDET_TX_PQM_VF_NUM_S			4
+#define GL_MDET_TX_PQM_VF_NUM_M			ICE_M(0xFF, 4)
+#define GL_MDET_TX_PQM_QNUM_S			12
+#define GL_MDET_TX_PQM_QNUM_M			ICE_M(0x3FFF, 12)
+#define GL_MDET_TX_PQM_MAL_TYPE_S		26
+#define GL_MDET_TX_PQM_MAL_TYPE_M		ICE_M(0x1F, 26)
+#define GL_MDET_TX_PQM_VALID_M			BIT(31)
+#define GL_MDET_TX_TCLAN			0x000FC068
+#define GL_MDET_TX_TCLAN_QNUM_S			0
+#define GL_MDET_TX_TCLAN_QNUM_M			ICE_M(0x7FFF, 0)
+#define GL_MDET_TX_TCLAN_VF_NUM_S		15
+#define GL_MDET_TX_TCLAN_VF_NUM_M		ICE_M(0xFF, 15)
+#define GL_MDET_TX_TCLAN_PF_NUM_S		23
+#define GL_MDET_TX_TCLAN_PF_NUM_M		ICE_M(0x7, 23)
+#define GL_MDET_TX_TCLAN_MAL_TYPE_S		26
+#define GL_MDET_TX_TCLAN_MAL_TYPE_M		ICE_M(0x1F, 26)
+#define GL_MDET_TX_TCLAN_VALID_M		BIT(31)
+#define PF_MDET_RX				0x00294280
+#define PF_MDET_RX_VALID_M			BIT(0)
+#define PF_MDET_TX_PQM				0x002D2C80
+#define PF_MDET_TX_PQM_VALID_M			BIT(0)
+#define PF_MDET_TX_TCLAN			0x000FC000
+#define PF_MDET_TX_TCLAN_VALID_M		BIT(0)
+#define VP_MDET_RX(_VF)				(0x00294400 + ((_VF) * 4))
+#define VP_MDET_RX_VALID_M			BIT(0)
+#define VP_MDET_TX_PQM(_VF)			(0x002D2000 + ((_VF) * 4))
+#define VP_MDET_TX_PQM_VALID_M			BIT(0)
+#define VP_MDET_TX_TCLAN(_VF)			(0x000FB800 + ((_VF) * 4))
+#define VP_MDET_TX_TCLAN_VALID_M		BIT(0)
+#define VP_MDET_TX_TDPU(_VF)			(0x00040000 + ((_VF) * 4))
+#define VP_MDET_TX_TDPU_VALID_M			BIT(0)
+#define GLNVM_FLA				0x000B6108
+#define GLNVM_FLA_LOCKED_M			BIT(6)
+#define GLNVM_GENS				0x000B6100
+#define GLNVM_GENS_SR_SIZE_S			5
+#define GLNVM_GENS_SR_SIZE_M			ICE_M(0x7, 5)
+#define GLNVM_ULD				0x000B6008
+#define GLNVM_ULD_PCIER_DONE_M			BIT(0)
+#define GLNVM_ULD_PCIER_DONE_1_M		BIT(1)
+#define GLNVM_ULD_CORER_DONE_M			BIT(3)
+#define GLNVM_ULD_GLOBR_DONE_M			BIT(4)
+#define GLNVM_ULD_POR_DONE_M			BIT(5)
+#define GLNVM_ULD_POR_DONE_1_M			BIT(8)
+#define GLNVM_ULD_PCIER_DONE_2_M		BIT(9)
+#define GLNVM_ULD_PE_DONE_M			BIT(10)
+#define GLPCI_CNF2				0x000BE004
+#define GLPCI_CNF2_CACHELINE_SIZE_M		BIT(1)
+#define PF_FUNC_RID				0x0009E880
+#define PF_FUNC_RID_FUNC_NUM_S			0
+#define PF_FUNC_RID_FUNC_NUM_M			ICE_M(0x7, 0)
+#define PF_PCI_CIAA				0x0009E580
+#define PF_PCI_CIAA_VF_NUM_S			12
+#define PF_PCI_CIAD				0x0009E500
+#define GL_PWR_MODE_CTL				0x000B820C
+#define GL_PWR_MODE_CTL_CAR_MAX_BW_S		30
+#define GL_PWR_MODE_CTL_CAR_MAX_BW_M		ICE_M(0x3, 30)
+#define GLQF_FD_CNT				0x00460018
+#define GLQF_FD_CNT_FD_BCNT_S			16
+#define GLQF_FD_CNT_FD_BCNT_M			ICE_M(0x7FFF, 16)
+#define GLQF_FD_SIZE				0x00460010
+#define GLQF_FD_SIZE_FD_GSIZE_S			0
+#define GLQF_FD_SIZE_FD_GSIZE_M			ICE_M(0x7FFF, 0)
+#define GLQF_FD_SIZE_FD_BSIZE_S			16
+#define GLQF_FD_SIZE_FD_BSIZE_M			ICE_M(0x7FFF, 16)
+#define GLQF_FDINSET(_i, _j)			(0x00412000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_FDMASK_SEL(_i)			(0x00410400 + ((_i) * 4))
+#define GLQF_FDSWAP(_i, _j)			(0x00413000 + ((_i) * 4 + (_j) * 512))
+#define PFQF_FD_ENA				0x0043A000
+#define PFQF_FD_ENA_FD_ENA_M			BIT(0)
+#define PFQF_FD_SIZE				0x00460100
+#define GLDCB_RTCTQ_RXQNUM_S			0
+#define GLDCB_RTCTQ_RXQNUM_M			ICE_M(0x7FF, 0)
+#define GLPRT_BPRCL(_i)				(0x00381380 + ((_i) * 8))
+#define GLPRT_BPTCL(_i)				(0x00381240 + ((_i) * 8))
+#define GLPRT_CRCERRS(_i)			(0x00380100 + ((_i) * 8))
+#define GLPRT_GORCL(_i)				(0x00380000 + ((_i) * 8))
+#define GLPRT_GOTCL(_i)				(0x00380B40 + ((_i) * 8))
+#define GLPRT_ILLERRC(_i)			(0x003801C0 + ((_i) * 8))
+#define GLPRT_LXOFFRXC(_i)			(0x003802C0 + ((_i) * 8))
+#define GLPRT_LXOFFTXC(_i)			(0x00381180 + ((_i) * 8))
+#define GLPRT_LXONRXC(_i)			(0x00380280 + ((_i) * 8))
+#define GLPRT_LXONTXC(_i)			(0x00381140 + ((_i) * 8))
+#define GLPRT_MLFC(_i)				(0x00380040 + ((_i) * 8))
+#define GLPRT_MPRCL(_i)				(0x00381340 + ((_i) * 8))
+#define GLPRT_MPTCL(_i)				(0x00381200 + ((_i) * 8))
+#define GLPRT_MRFC(_i)				(0x00380080 + ((_i) * 8))
+#define GLPRT_PRC1023L(_i)			(0x00380A00 + ((_i) * 8))
+#define GLPRT_PRC127L(_i)			(0x00380940 + ((_i) * 8))
+#define GLPRT_PRC1522L(_i)			(0x00380A40 + ((_i) * 8))
+#define GLPRT_PRC255L(_i)			(0x00380980 + ((_i) * 8))
+#define GLPRT_PRC511L(_i)			(0x003809C0 + ((_i) * 8))
+#define GLPRT_PRC64L(_i)			(0x00380900 + ((_i) * 8))
+#define GLPRT_PRC9522L(_i)			(0x00380A80 + ((_i) * 8))
+#define GLPRT_PTC1023L(_i)			(0x00380C80 + ((_i) * 8))
+#define GLPRT_PTC127L(_i)			(0x00380BC0 + ((_i) * 8))
+#define GLPRT_PTC1522L(_i)			(0x00380CC0 + ((_i) * 8))
+#define GLPRT_PTC255L(_i)			(0x00380C00 + ((_i) * 8))
+#define GLPRT_PTC511L(_i)			(0x00380C40 + ((_i) * 8))
+#define GLPRT_PTC64L(_i)			(0x00380B80 + ((_i) * 8))
+#define GLPRT_PTC9522L(_i)			(0x00380D00 + ((_i) * 8))
+#define GLPRT_PXOFFRXC(_i, _j)			(0x00380500 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXOFFTXC(_i, _j)			(0x00380F40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONRXC(_i, _j)			(0x00380300 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONTXC(_i, _j)			(0x00380D40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_RFC(_i)				(0x00380AC0 + ((_i) * 8))
+#define GLPRT_RJC(_i)				(0x00380B00 + ((_i) * 8))
+#define GLPRT_RLEC(_i)				(0x00380140 + ((_i) * 8))
+#define GLPRT_ROC(_i)				(0x00380240 + ((_i) * 8))
+#define GLPRT_RUC(_i)				(0x00380200 + ((_i) * 8))
+#define GLPRT_RXON2OFFCNT(_i, _j)		(0x00380700 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_TDOLD(_i)				(0x00381280 + ((_i) * 8))
+#define GLPRT_UPRCL(_i)				(0x00381300 + ((_i) * 8))
+#define GLPRT_UPTCL(_i)				(0x003811C0 + ((_i) * 8))
+#define GLSTAT_FD_CNT0L(_i)			(0x003A0000 + ((_i) * 8))
+#define GLV_BPRCL(_i)				(0x003B6000 + ((_i) * 8))
+#define GLV_BPTCL(_i)				(0x0030E000 + ((_i) * 8))
+#define GLV_GORCL(_i)				(0x003B0000 + ((_i) * 8))
+#define GLV_GOTCL(_i)				(0x00300000 + ((_i) * 8))
+#define GLV_MPRCL(_i)				(0x003B4000 + ((_i) * 8))
+#define GLV_MPTCL(_i)				(0x0030C000 + ((_i) * 8))
+#define GLV_RDPC(_i)				(0x00294C04 + ((_i) * 4))
+#define GLV_TEPC(_VSI)				(0x00312000 + ((_VSI) * 4))
+#define GLV_UPRCL(_i)				(0x003B2000 + ((_i) * 8))
+#define GLV_UPTCL(_i)				(0x0030A000 + ((_i) * 8))
+#define PRTRPB_RDPC				0x000AC260
+#define VSIQF_FD_CNT(_VSI)			(0x00464000 + ((_VSI) * 4))
+#define VSIQF_FD_CNT_FD_GCNT_S			0
+#define VSIQF_FD_CNT_FD_GCNT_M			ICE_M(0x3FFF, 0)
+#define VSIQF_HKEY_MAX_INDEX			12
+#define VSIQF_HLUT_MAX_INDEX			15
+#define PFPM_APM				0x000B8080
+#define PFPM_APM_APME_M				BIT(0)
+#define PFPM_WUFC				0x0009DC00
+#define PFPM_WUFC_MAG_M				BIT(1)
+#define PFPM_WUS				0x0009DB80
+#define PFPM_WUS_LNKC_M				BIT(0)
+#define PFPM_WUS_MAG_M				BIT(1)
+#define PFPM_WUS_MNG_M				BIT(3)
+#define PFPM_WUS_FW_RST_WK_M			BIT(31)
+#define VFINT_DYN_CTLN(_i)			(0x00003800 + ((_i) * 4))
+#define VFINT_DYN_CTLN_CLEARPBA_M		BIT(1)
+
+#endif /* _ICE_HW_AUTOGEN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
new file mode 100644
index 000000000..847e1ef8e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -0,0 +1,946 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LAN_TX_RX_H_
+#define _ICE_LAN_TX_RX_H_
+
+union ice_32byte_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+			/* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		struct {
+			struct {
+				__le16 mirroring_status;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fd_id; /* Flow Director filter ID */
+			} hi_dword;
+		} qword0;
+		struct {
+			/* status/error/PTYPE/length */
+			__le64 status_error_len;
+		} qword1;
+		struct {
+			__le16 ext_status; /* extended status */
+			__le16 rsvd;
+			__le16 l2tag2_1;
+			__le16 l2tag2_2;
+		} qword2;
+		struct {
+			__le32 reserved;
+			__le32 fd_id;
+		} qword3;
+	} wb; /* writeback */
+};
+
+struct ice_fltr_desc {
+	__le64 qidx_compq_space_stat;
+	__le64 dtype_cmd_vsi_fdid;
+};
+
+#define ICE_FXD_FLTR_QW0_QINDEX_S	0
+#define ICE_FXD_FLTR_QW0_QINDEX_M	(0x7FFULL << ICE_FXD_FLTR_QW0_QINDEX_S)
+#define ICE_FXD_FLTR_QW0_COMP_Q_S	11
+#define ICE_FXD_FLTR_QW0_COMP_Q_M	BIT_ULL(ICE_FXD_FLTR_QW0_COMP_Q_S)
+#define ICE_FXD_FLTR_QW0_COMP_Q_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_S	12
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_M	\
+				(0x3ULL << ICE_FXD_FLTR_QW0_COMP_REPORT_S)
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL	0x1ULL
+
+#define ICE_FXD_FLTR_QW0_FD_SPACE_S	14
+#define ICE_FXD_FLTR_QW0_FD_SPACE_M	(0x3ULL << ICE_FXD_FLTR_QW0_FD_SPACE_S)
+#define ICE_FXD_FLTR_QW0_FD_SPACE_GUAR_BEST		0x2ULL
+
+#define ICE_FXD_FLTR_QW0_STAT_CNT_S	16
+#define ICE_FXD_FLTR_QW0_STAT_CNT_M	\
+				(0x1FFFULL << ICE_FXD_FLTR_QW0_STAT_CNT_S)
+#define ICE_FXD_FLTR_QW0_STAT_ENA_S	29
+#define ICE_FXD_FLTR_QW0_STAT_ENA_M	(0x3ULL << ICE_FXD_FLTR_QW0_STAT_ENA_S)
+#define ICE_FXD_FLTR_QW0_STAT_ENA_PKTS		0x1ULL
+
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_S	31
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_M	BIT_ULL(ICE_FXD_FLTR_QW0_EVICT_ENA_S)
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE	0x0ULL
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_TRUE		0x1ULL
+
+#define ICE_FXD_FLTR_QW0_TO_Q_S		32
+#define ICE_FXD_FLTR_QW0_TO_Q_M		(0x7ULL << ICE_FXD_FLTR_QW0_TO_Q_S)
+#define ICE_FXD_FLTR_QW0_TO_Q_EQUALS_QINDEX	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_TO_Q_PRI_S	35
+#define ICE_FXD_FLTR_QW0_TO_Q_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW0_TO_Q_PRI_S)
+#define ICE_FXD_FLTR_QW0_TO_Q_PRIO1	0x1ULL
+
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_S	38
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_M	\
+			(0x3ULL << ICE_FXD_FLTR_QW0_DPU_RECIPE_S)
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_DFLT	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_DROP_S		40
+#define ICE_FXD_FLTR_QW0_DROP_M		BIT_ULL(ICE_FXD_FLTR_QW0_DROP_S)
+#define ICE_FXD_FLTR_QW0_DROP_NO	0x0ULL
+#define ICE_FXD_FLTR_QW0_DROP_YES	0x1ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_S	41
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW0_FLEX_PRI_S)
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_NONE	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_MDID_S	44
+#define ICE_FXD_FLTR_QW0_FLEX_MDID_M	(0xFULL << ICE_FXD_FLTR_QW0_FLEX_MDID_S)
+#define ICE_FXD_FLTR_QW0_FLEX_MDID0	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_VAL_S	48
+#define ICE_FXD_FLTR_QW0_FLEX_VAL_M	\
+				(0xFFFFULL << ICE_FXD_FLTR_QW0_FLEX_VAL_S)
+#define ICE_FXD_FLTR_QW0_FLEX_VAL0	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_DTYPE_S	0
+#define ICE_FXD_FLTR_QW1_DTYPE_M	(0xFULL << ICE_FXD_FLTR_QW1_DTYPE_S)
+#define ICE_FXD_FLTR_QW1_PCMD_S		4
+#define ICE_FXD_FLTR_QW1_PCMD_M		BIT_ULL(ICE_FXD_FLTR_QW1_PCMD_S)
+#define ICE_FXD_FLTR_QW1_PCMD_ADD	0x0ULL
+#define ICE_FXD_FLTR_QW1_PCMD_REMOVE	0x1ULL
+
+#define ICE_FXD_FLTR_QW1_PROF_PRI_S	5
+#define ICE_FXD_FLTR_QW1_PROF_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW1_PROF_PRI_S)
+#define ICE_FXD_FLTR_QW1_PROF_PRIO_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_PROF_S		8
+#define ICE_FXD_FLTR_QW1_PROF_M		(0x3FULL << ICE_FXD_FLTR_QW1_PROF_S)
+#define ICE_FXD_FLTR_QW1_PROF_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_FD_VSI_S	14
+#define ICE_FXD_FLTR_QW1_FD_VSI_M	(0x3FFULL << ICE_FXD_FLTR_QW1_FD_VSI_S)
+#define ICE_FXD_FLTR_QW1_SWAP_S		24
+#define ICE_FXD_FLTR_QW1_SWAP_M		BIT_ULL(ICE_FXD_FLTR_QW1_SWAP_S)
+#define ICE_FXD_FLTR_QW1_SWAP_NOT_SET	0x0ULL
+#define ICE_FXD_FLTR_QW1_SWAP_SET	0x1ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_PRI_S	25
+#define ICE_FXD_FLTR_QW1_FDID_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW1_FDID_PRI_S)
+#define ICE_FXD_FLTR_QW1_FDID_PRI_ONE	0x1ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_MDID_S	28
+#define ICE_FXD_FLTR_QW1_FDID_MDID_M	(0xFULL << ICE_FXD_FLTR_QW1_FDID_MDID_S)
+#define ICE_FXD_FLTR_QW1_FDID_MDID_FD	0x05ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_S		32
+#define ICE_FXD_FLTR_QW1_FDID_M		\
+			(0xFFFFFFFFULL << ICE_FXD_FLTR_QW1_FDID_S)
+#define ICE_FXD_FLTR_QW1_FDID_ZERO	0x0ULL
+
+struct ice_rx_ptype_decoded {
+	u32 ptype:10;
+	u32 known:1;
+	u32 outer_ip:1;
+	u32 outer_ip_ver:2;
+	u32 outer_frag:1;
+	u32 tunnel_type:3;
+	u32 tunnel_end_prot:2;
+	u32 tunnel_end_frag:1;
+	u32 inner_prot:4;
+	u32 payload_layer:3;
+};
+
+enum ice_rx_ptype_outer_ip {
+	ICE_RX_PTYPE_OUTER_L2	= 0,
+	ICE_RX_PTYPE_OUTER_IP	= 1,
+};
+
+enum ice_rx_ptype_outer_ip_ver {
+	ICE_RX_PTYPE_OUTER_NONE	= 0,
+	ICE_RX_PTYPE_OUTER_IPV4	= 1,
+	ICE_RX_PTYPE_OUTER_IPV6	= 2,
+};
+
+enum ice_rx_ptype_outer_fragmented {
+	ICE_RX_PTYPE_NOT_FRAG	= 0,
+	ICE_RX_PTYPE_FRAG	= 1,
+};
+
+enum ice_rx_ptype_tunnel_type {
+	ICE_RX_PTYPE_TUNNEL_NONE		= 0,
+	ICE_RX_PTYPE_TUNNEL_IP_IP		= 1,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
+};
+
+enum ice_rx_ptype_tunnel_end_prot {
+	ICE_RX_PTYPE_TUNNEL_END_NONE	= 0,
+	ICE_RX_PTYPE_TUNNEL_END_IPV4	= 1,
+	ICE_RX_PTYPE_TUNNEL_END_IPV6	= 2,
+};
+
+enum ice_rx_ptype_inner_prot {
+	ICE_RX_PTYPE_INNER_PROT_NONE		= 0,
+	ICE_RX_PTYPE_INNER_PROT_UDP		= 1,
+	ICE_RX_PTYPE_INNER_PROT_TCP		= 2,
+	ICE_RX_PTYPE_INNER_PROT_SCTP		= 3,
+	ICE_RX_PTYPE_INNER_PROT_ICMP		= 4,
+	ICE_RX_PTYPE_INNER_PROT_TIMESYNC	= 5,
+};
+
+enum ice_rx_ptype_payload_layer {
+	ICE_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
+};
+
+/* Rx Flex Descriptor
+ * This descriptor is used instead of the legacy version descriptor when
+ * ice_rlan_ctx.adv_desc is set
+ */
+union ice_32b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Rx Flex Descriptor NIC Profile
+ * This descriptor corresponds to RxDID 2 which contains
+ * metadata fields for RSS, flow ID and timestamp info
+ */
+struct ice_32b_rx_flex_desc_nic {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 vlan_id;
+			__le16 flow_id_ipv6;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum ice_rxdid {
+	ICE_RXDID_LEGACY_0		= 0,
+	ICE_RXDID_LEGACY_1		= 1,
+	ICE_RXDID_FLEX_NIC		= 2,
+	ICE_RXDID_FLEX_NIC_2		= 6,
+	ICE_RXDID_HW			= 7,
+	ICE_RXDID_LAST			= 63,
+};
+
+/* Receive Flex Descriptor Rx opcode values */
+#define ICE_RX_OPC_MDID		0x01
+
+/* Receive Descriptor MDID values that access packet flags */
+enum ice_flex_mdid_pkt_flags {
+	ICE_RX_MDID_PKT_FLAGS_15_0	= 20,
+	ICE_RX_MDID_PKT_FLAGS_31_16,
+	ICE_RX_MDID_PKT_FLAGS_47_32,
+	ICE_RX_MDID_PKT_FLAGS_63_48,
+};
+
+/* Receive Descriptor MDID values */
+enum ice_flex_rx_mdid {
+	ICE_RX_MDID_FLOW_ID_LOWER	= 5,
+	ICE_RX_MDID_FLOW_ID_HIGH,
+	ICE_RX_MDID_SRC_VSI		= 19,
+	ICE_RX_MDID_HASH_LOW		= 56,
+	ICE_RX_MDID_HASH_HIGH,
+};
+
+/* Rx/Tx Flag64 packet flag bits */
+enum ice_flg64_bits {
+	ICE_FLG_PKT_DSI		= 0,
+	ICE_FLG_EVLAN_x8100	= 14,
+	ICE_FLG_EVLAN_x9100,
+	ICE_FLG_VLAN_x8100,
+	ICE_FLG_TNL_MAC		= 22,
+	ICE_FLG_TNL_VLAN,
+	ICE_FLG_PKT_FRG,
+	ICE_FLG_FIN		= 32,
+	ICE_FLG_SYN,
+	ICE_FLG_RST,
+	ICE_FLG_TNL0		= 38,
+	ICE_FLG_TNL1,
+	ICE_FLG_TNL2,
+	ICE_FLG_UDP_GRE,
+	ICE_FLG_RSVD		= 63
+};
+
+/* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */
+#define ICE_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for ice_32byte_rx_flex_desc.pkt_length member */
+#define ICE_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
+enum ice_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	ICE_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	ICE_RX_FLEX_DESC_STATUS0_EOF_S,
+	ICE_RX_FLEX_DESC_STATUS0_HBO_S,
+	ICE_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_LPBK_S,
+	ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	ICE_RX_FLEX_DESC_STATUS0_RXE_S,
+	ICE_RX_FLEX_DESC_STATUS0_CRCP_S,
+	ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	ICE_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+enum ice_rx_flex_desc_status_error_1_bits {
+	/* Note: These are predefined bit offsets */
+	ICE_RX_FLEX_DESC_STATUS1_NAT_S = 4,
+	ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */
+};
+
+#define ICE_RXQ_CTX_SIZE_DWORDS		8
+#define ICE_RXQ_CTX_SZ			(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
+#define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS	22
+#define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS	5
+#define GLTCLAN_CQ_CNTX(i, CQ)		(GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800))
+
+/* RLAN Rx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_rlan_ctx {
+	u16 head;
+	u16 cpuid; /* bigger than needed, see above for reason */
+#define ICE_RLAN_BASE_S 7
+	u64 base;
+	u16 qlen;
+#define ICE_RLAN_CTX_DBUF_S 7
+	u16 dbuf; /* bigger than needed, see above for reason */
+#define ICE_RLAN_CTX_HBUF_S 6
+	u16 hbuf; /* bigger than needed, see above for reason */
+	u8 dtype;
+	u8 dsize;
+	u8 crcstrip;
+	u8 l2tsel;
+	u8 hsplit_0;
+	u8 hsplit_1;
+	u8 showiv;
+	u32 rxmax; /* bigger than needed, see above for reason */
+	u8 tphrdesc_ena;
+	u8 tphwdesc_ena;
+	u8 tphdata_ena;
+	u8 tphhead_ena;
+	u16 lrxqthresh; /* bigger than needed, see above for reason */
+	u8 prefena;	/* NOTE: normally must be set to 1 at init */
+};
+
+struct ice_ctx_ele {
+	u16 offset;
+	u16 size_of;
+	u16 width;
+	u16 lsb;
+};
+
+#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) {	\
+	.offset = offsetof(struct _struct, _ele),	\
+	.size_of = sizeof_field(struct _struct, _ele),	\
+	.width = _width,				\
+	.lsb = _lsb,					\
+}
+
+/* for hsplit_0 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_0 {
+	ICE_RLAN_RX_HSPLIT_0_NO_SPLIT		= 0,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_L2		= 1,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_IP		= 2,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP	= 4,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP		= 8,
+};
+
+/* for hsplit_1 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_1 {
+	ICE_RLAN_RX_HSPLIT_1_NO_SPLIT		= 0,
+	ICE_RLAN_RX_HSPLIT_1_SPLIT_L2		= 1,
+	ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS	= 2,
+};
+
+/* Tx Descriptor */
+struct ice_tx_desc {
+	__le64 buf_addr; /* Address of descriptor's data buf */
+	__le64 cmd_type_offset_bsz;
+};
+
+enum ice_tx_desc_dtype_value {
+	ICE_TX_DESC_DTYPE_DATA		= 0x0,
+	ICE_TX_DESC_DTYPE_CTX		= 0x1,
+	ICE_TX_DESC_DTYPE_FLTR_PROG	= 0x8,
+	/* DESC_DONE - HW has completed write-back of descriptor */
+	ICE_TX_DESC_DTYPE_DESC_DONE	= 0xF,
+};
+
+#define ICE_TXD_QW1_CMD_S	4
+#define ICE_TXD_QW1_CMD_M	(0xFFFUL << ICE_TXD_QW1_CMD_S)
+
+enum ice_tx_desc_cmd_bits {
+	ICE_TX_DESC_CMD_EOP			= 0x0001,
+	ICE_TX_DESC_CMD_RS			= 0x0002,
+	ICE_TX_DESC_CMD_IL2TAG1			= 0x0008,
+	ICE_TX_DESC_CMD_DUMMY			= 0x0010,
+	ICE_TX_DESC_CMD_IIPT_IPV6		= 0x0020,
+	ICE_TX_DESC_CMD_IIPT_IPV4		= 0x0040,
+	ICE_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060,
+	ICE_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100,
+	ICE_TX_DESC_CMD_L4T_EOFT_SCTP		= 0x0200,
+	ICE_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300,
+	ICE_TX_DESC_CMD_RE			= 0x0400,
+};
+
+#define ICE_TXD_QW1_OFFSET_S	16
+#define ICE_TXD_QW1_OFFSET_M	(0x3FFFFULL << ICE_TXD_QW1_OFFSET_S)
+
+enum ice_tx_desc_len_fields {
+	/* Note: These are predefined bit offsets */
+	ICE_TX_DESC_LEN_MACLEN_S	= 0, /* 7 BITS */
+	ICE_TX_DESC_LEN_IPLEN_S	= 7, /* 7 BITS */
+	ICE_TX_DESC_LEN_L4_LEN_S	= 14 /* 4 BITS */
+};
+
+#define ICE_TXD_QW1_MACLEN_M (0x7FUL << ICE_TX_DESC_LEN_MACLEN_S)
+#define ICE_TXD_QW1_IPLEN_M  (0x7FUL << ICE_TX_DESC_LEN_IPLEN_S)
+#define ICE_TXD_QW1_L4LEN_M  (0xFUL << ICE_TX_DESC_LEN_L4_LEN_S)
+
+/* Tx descriptor field limits in bytes */
+#define ICE_TXD_MACLEN_MAX ((ICE_TXD_QW1_MACLEN_M >> \
+			     ICE_TX_DESC_LEN_MACLEN_S) * ICE_BYTES_PER_WORD)
+#define ICE_TXD_IPLEN_MAX ((ICE_TXD_QW1_IPLEN_M >> \
+			    ICE_TX_DESC_LEN_IPLEN_S) * ICE_BYTES_PER_DWORD)
+#define ICE_TXD_L4LEN_MAX ((ICE_TXD_QW1_L4LEN_M >> \
+			    ICE_TX_DESC_LEN_L4_LEN_S) * ICE_BYTES_PER_DWORD)
+
+#define ICE_TXD_QW1_TX_BUF_SZ_S	34
+#define ICE_TXD_QW1_L2TAG1_S	48
+
+/* Context descriptors */
+struct ice_tx_ctx_desc {
+	__le32 tunneling_params;
+	__le16 l2tag2;
+	__le16 rsvd;
+	__le64 qw1;
+};
+
+#define ICE_TXD_CTX_QW1_CMD_S	4
+#define ICE_TXD_CTX_QW1_CMD_M	(0x7FUL << ICE_TXD_CTX_QW1_CMD_S)
+
+#define ICE_TXD_CTX_QW1_TSO_LEN_S	30
+#define ICE_TXD_CTX_QW1_TSO_LEN_M	\
+			(0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S)
+
+#define ICE_TXD_CTX_QW1_MSS_S	50
+#define ICE_TXD_CTX_MIN_MSS	64
+
+enum ice_tx_ctx_desc_cmd_bits {
+	ICE_TX_CTX_DESC_TSO		= 0x01,
+	ICE_TX_CTX_DESC_TSYN		= 0x02,
+	ICE_TX_CTX_DESC_IL2TAG2		= 0x04,
+	ICE_TX_CTX_DESC_IL2TAG2_IL2H	= 0x08,
+	ICE_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
+	ICE_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
+	ICE_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
+	ICE_TX_CTX_DESC_SWTCH_VSI	= 0x30,
+	ICE_TX_CTX_DESC_RESERVED	= 0x40
+};
+
+enum ice_tx_ctx_desc_eipt_offload {
+	ICE_TX_CTX_EIPT_NONE		= 0x0,
+	ICE_TX_CTX_EIPT_IPV6		= 0x1,
+	ICE_TX_CTX_EIPT_IPV4_NO_CSUM	= 0x2,
+	ICE_TX_CTX_EIPT_IPV4		= 0x3
+};
+
+#define ICE_TXD_CTX_QW0_EIPLEN_S	2
+
+#define ICE_TXD_CTX_QW0_L4TUNT_S	9
+
+#define ICE_TXD_CTX_UDP_TUNNELING	BIT_ULL(ICE_TXD_CTX_QW0_L4TUNT_S)
+#define ICE_TXD_CTX_GRE_TUNNELING	(0x2ULL << ICE_TXD_CTX_QW0_L4TUNT_S)
+
+#define ICE_TXD_CTX_QW0_NATLEN_S	12
+
+#define ICE_TXD_CTX_QW0_L4T_CS_S	23
+#define ICE_TXD_CTX_QW0_L4T_CS_M	BIT_ULL(ICE_TXD_CTX_QW0_L4T_CS_S)
+
+#define ICE_LAN_TXQ_MAX_QGRPS	127
+#define ICE_LAN_TXQ_MAX_QDIS	1023
+
+/* Tx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_tlan_ctx {
+#define ICE_TLAN_CTX_BASE_S	7
+	u64 base;		/* base is defined in 128-byte units */
+	u8 port_num;
+	u16 cgd_num;		/* bigger than needed, see above for reason */
+	u8 pf_num;
+	u16 vmvf_num;
+	u8 vmvf_type;
+#define ICE_TLAN_CTX_VMVF_TYPE_VF	0
+#define ICE_TLAN_CTX_VMVF_TYPE_VMQ	1
+#define ICE_TLAN_CTX_VMVF_TYPE_PF	2
+	u16 src_vsi;
+	u8 tsyn_ena;
+	u8 internal_usage_flag;
+	u8 alt_vlan;
+	u16 cpuid;		/* bigger than needed, see above for reason */
+	u8 wb_mode;
+	u8 tphrd_desc;
+	u8 tphrd;
+	u8 tphwr_desc;
+	u16 cmpq_id;
+	u16 qnum_in_func;
+	u8 itr_notification_mode;
+	u8 adjust_prof_id;
+	u32 qlen;		/* bigger than needed, see above for reason */
+	u8 quanta_prof_idx;
+	u8 tso_ena;
+	u16 tso_qnum;
+	u8 legacy_int;
+	u8 drop_ena;
+	u8 cache_prof_idx;
+	u8 pkt_shaper_prof_idx;
+	u8 int_q_state;	/* width not needed - internal - DO NOT WRITE!!! */
+};
+
+/* macro to make the table lines short */
+#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	{	PTYPE, \
+		1, \
+		ICE_RX_PTYPE_OUTER_##OUTER_IP, \
+		ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		ICE_RX_PTYPE_##OUTER_FRAG, \
+		ICE_RX_PTYPE_TUNNEL_##T, \
+		ICE_RX_PTYPE_TUNNEL_END_##TE, \
+		ICE_RX_PTYPE_##TEF, \
+		ICE_RX_PTYPE_INNER_PROT_##I, \
+		ICE_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define ICE_PTT_UNUSED_ENTRY(PTYPE) { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define ICE_RX_PTYPE_NOF		ICE_RX_PTYPE_NOT_FRAG
+#define ICE_RX_PTYPE_FRG		ICE_RX_PTYPE_FRAG
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = {
+	/* L2 Packet types */
+	ICE_PTT_UNUSED_ENTRY(0),
+	ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	ICE_PTT_UNUSED_ENTRY(2),
+	ICE_PTT_UNUSED_ENTRY(3),
+	ICE_PTT_UNUSED_ENTRY(4),
+	ICE_PTT_UNUSED_ENTRY(5),
+	ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT_UNUSED_ENTRY(8),
+	ICE_PTT_UNUSED_ENTRY(9),
+	ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT_UNUSED_ENTRY(12),
+	ICE_PTT_UNUSED_ENTRY(13),
+	ICE_PTT_UNUSED_ENTRY(14),
+	ICE_PTT_UNUSED_ENTRY(15),
+	ICE_PTT_UNUSED_ENTRY(16),
+	ICE_PTT_UNUSED_ENTRY(17),
+	ICE_PTT_UNUSED_ENTRY(18),
+	ICE_PTT_UNUSED_ENTRY(19),
+	ICE_PTT_UNUSED_ENTRY(20),
+	ICE_PTT_UNUSED_ENTRY(21),
+
+	/* Non Tunneled IPv4 */
+	ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+	ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+	ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(25),
+	ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv4 */
+	ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(32),
+	ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv6 */
+	ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(39),
+	ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT */
+	ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> IPv4 */
+	ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(47),
+	ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> IPv6 */
+	ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(54),
+	ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC */
+	ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+	ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(62),
+	ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+	ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(69),
+	ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC/VLAN */
+	ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+	ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(77),
+	ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+	ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(84),
+	ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* Non Tunneled IPv6 */
+	ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+	ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+	ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(91),
+	ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv4 */
+	ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(98),
+	ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv6 */
+	ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(105),
+	ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT */
+	ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> IPv4 */
+	ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(113),
+	ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> IPv6 */
+	ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(120),
+	ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC */
+	ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+	ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(128),
+	ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+	ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(135),
+	ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN */
+	ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+	ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(143),
+	ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+	ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(150),
+	ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* unused entries */
+	ICE_PTT_UNUSED_ENTRY(154),
+	ICE_PTT_UNUSED_ENTRY(155),
+	ICE_PTT_UNUSED_ENTRY(156),
+	ICE_PTT_UNUSED_ENTRY(157),
+	ICE_PTT_UNUSED_ENTRY(158),
+	ICE_PTT_UNUSED_ENTRY(159),
+
+	ICE_PTT_UNUSED_ENTRY(160),
+	ICE_PTT_UNUSED_ENTRY(161),
+	ICE_PTT_UNUSED_ENTRY(162),
+	ICE_PTT_UNUSED_ENTRY(163),
+	ICE_PTT_UNUSED_ENTRY(164),
+	ICE_PTT_UNUSED_ENTRY(165),
+	ICE_PTT_UNUSED_ENTRY(166),
+	ICE_PTT_UNUSED_ENTRY(167),
+	ICE_PTT_UNUSED_ENTRY(168),
+	ICE_PTT_UNUSED_ENTRY(169),
+
+	ICE_PTT_UNUSED_ENTRY(170),
+	ICE_PTT_UNUSED_ENTRY(171),
+	ICE_PTT_UNUSED_ENTRY(172),
+	ICE_PTT_UNUSED_ENTRY(173),
+	ICE_PTT_UNUSED_ENTRY(174),
+	ICE_PTT_UNUSED_ENTRY(175),
+	ICE_PTT_UNUSED_ENTRY(176),
+	ICE_PTT_UNUSED_ENTRY(177),
+	ICE_PTT_UNUSED_ENTRY(178),
+	ICE_PTT_UNUSED_ENTRY(179),
+
+	ICE_PTT_UNUSED_ENTRY(180),
+	ICE_PTT_UNUSED_ENTRY(181),
+	ICE_PTT_UNUSED_ENTRY(182),
+	ICE_PTT_UNUSED_ENTRY(183),
+	ICE_PTT_UNUSED_ENTRY(184),
+	ICE_PTT_UNUSED_ENTRY(185),
+	ICE_PTT_UNUSED_ENTRY(186),
+	ICE_PTT_UNUSED_ENTRY(187),
+	ICE_PTT_UNUSED_ENTRY(188),
+	ICE_PTT_UNUSED_ENTRY(189),
+
+	ICE_PTT_UNUSED_ENTRY(190),
+	ICE_PTT_UNUSED_ENTRY(191),
+	ICE_PTT_UNUSED_ENTRY(192),
+	ICE_PTT_UNUSED_ENTRY(193),
+	ICE_PTT_UNUSED_ENTRY(194),
+	ICE_PTT_UNUSED_ENTRY(195),
+	ICE_PTT_UNUSED_ENTRY(196),
+	ICE_PTT_UNUSED_ENTRY(197),
+	ICE_PTT_UNUSED_ENTRY(198),
+	ICE_PTT_UNUSED_ENTRY(199),
+
+	ICE_PTT_UNUSED_ENTRY(200),
+	ICE_PTT_UNUSED_ENTRY(201),
+	ICE_PTT_UNUSED_ENTRY(202),
+	ICE_PTT_UNUSED_ENTRY(203),
+	ICE_PTT_UNUSED_ENTRY(204),
+	ICE_PTT_UNUSED_ENTRY(205),
+	ICE_PTT_UNUSED_ENTRY(206),
+	ICE_PTT_UNUSED_ENTRY(207),
+	ICE_PTT_UNUSED_ENTRY(208),
+	ICE_PTT_UNUSED_ENTRY(209),
+
+	ICE_PTT_UNUSED_ENTRY(210),
+	ICE_PTT_UNUSED_ENTRY(211),
+	ICE_PTT_UNUSED_ENTRY(212),
+	ICE_PTT_UNUSED_ENTRY(213),
+	ICE_PTT_UNUSED_ENTRY(214),
+	ICE_PTT_UNUSED_ENTRY(215),
+	ICE_PTT_UNUSED_ENTRY(216),
+	ICE_PTT_UNUSED_ENTRY(217),
+	ICE_PTT_UNUSED_ENTRY(218),
+	ICE_PTT_UNUSED_ENTRY(219),
+
+	ICE_PTT_UNUSED_ENTRY(220),
+	ICE_PTT_UNUSED_ENTRY(221),
+	ICE_PTT_UNUSED_ENTRY(222),
+	ICE_PTT_UNUSED_ENTRY(223),
+	ICE_PTT_UNUSED_ENTRY(224),
+	ICE_PTT_UNUSED_ENTRY(225),
+	ICE_PTT_UNUSED_ENTRY(226),
+	ICE_PTT_UNUSED_ENTRY(227),
+	ICE_PTT_UNUSED_ENTRY(228),
+	ICE_PTT_UNUSED_ENTRY(229),
+
+	ICE_PTT_UNUSED_ENTRY(230),
+	ICE_PTT_UNUSED_ENTRY(231),
+	ICE_PTT_UNUSED_ENTRY(232),
+	ICE_PTT_UNUSED_ENTRY(233),
+	ICE_PTT_UNUSED_ENTRY(234),
+	ICE_PTT_UNUSED_ENTRY(235),
+	ICE_PTT_UNUSED_ENTRY(236),
+	ICE_PTT_UNUSED_ENTRY(237),
+	ICE_PTT_UNUSED_ENTRY(238),
+	ICE_PTT_UNUSED_ENTRY(239),
+
+	ICE_PTT_UNUSED_ENTRY(240),
+	ICE_PTT_UNUSED_ENTRY(241),
+	ICE_PTT_UNUSED_ENTRY(242),
+	ICE_PTT_UNUSED_ENTRY(243),
+	ICE_PTT_UNUSED_ENTRY(244),
+	ICE_PTT_UNUSED_ENTRY(245),
+	ICE_PTT_UNUSED_ENTRY(246),
+	ICE_PTT_UNUSED_ENTRY(247),
+	ICE_PTT_UNUSED_ENTRY(248),
+	ICE_PTT_UNUSED_ENTRY(249),
+
+	ICE_PTT_UNUSED_ENTRY(250),
+	ICE_PTT_UNUSED_ENTRY(251),
+	ICE_PTT_UNUSED_ENTRY(252),
+	ICE_PTT_UNUSED_ENTRY(253),
+	ICE_PTT_UNUSED_ENTRY(254),
+	ICE_PTT_UNUSED_ENTRY(255),
+};
+
+static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
+{
+	return ice_ptype_lkup[ptype];
+}
+
+#define ICE_LINK_SPEED_UNKNOWN		0
+#define ICE_LINK_SPEED_10MBPS		10
+#define ICE_LINK_SPEED_100MBPS		100
+#define ICE_LINK_SPEED_1000MBPS		1000
+#define ICE_LINK_SPEED_2500MBPS		2500
+#define ICE_LINK_SPEED_5000MBPS		5000
+#define ICE_LINK_SPEED_10000MBPS	10000
+#define ICE_LINK_SPEED_20000MBPS	20000
+#define ICE_LINK_SPEED_25000MBPS	25000
+#define ICE_LINK_SPEED_40000MBPS	40000
+#define ICE_LINK_SPEED_50000MBPS	50000
+#define ICE_LINK_SPEED_100000MBPS	100000
+
+#endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
new file mode 100644
index 000000000..e99c8c10b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -0,0 +1,3284 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_flow.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
+#include "ice_devlink.h"
+
+/**
+ * ice_vsi_type_str - maps VSI type enum to string equivalents
+ * @vsi_type: VSI type enum
+ */
+const char *ice_vsi_type_str(enum ice_vsi_type vsi_type)
+{
+	switch (vsi_type) {
+	case ICE_VSI_PF:
+		return "ICE_VSI_PF";
+	case ICE_VSI_VF:
+		return "ICE_VSI_VF";
+	case ICE_VSI_CTRL:
+		return "ICE_VSI_CTRL";
+	case ICE_VSI_LB:
+		return "ICE_VSI_LB";
+	default:
+		return "unknown";
+	}
+}
+
+/**
+ * ice_vsi_ctrl_all_rx_rings - Start or stop a VSI's Rx rings
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ *
+ * First enable/disable all of the Rx rings, flush any remaining writes, and
+ * then verify that they have all been enabled/disabled successfully. This will
+ * let all of the register writes complete when enabling/disabling the Rx rings
+ * before waiting for the change in hardware to complete.
+ */
+static int ice_vsi_ctrl_all_rx_rings(struct ice_vsi *vsi, bool ena)
+{
+	int ret = 0;
+	u16 i;
+
+	for (i = 0; i < vsi->num_rxq; i++)
+		ice_vsi_ctrl_one_rx_ring(vsi, ena, i, false);
+
+	ice_flush(&vsi->back->hw);
+
+	for (i = 0; i < vsi->num_rxq; i++) {
+		ret = ice_vsi_wait_one_rx_ring(vsi, ena, i);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the VSI
+ * @vsi: VSI pointer
+ *
+ * On error: returns error code (negative)
+ * On success: returns 0
+ */
+static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	/* allocate memory for both Tx and Rx ring pointers */
+	vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq,
+				     sizeof(*vsi->tx_rings), GFP_KERNEL);
+	if (!vsi->tx_rings)
+		return -ENOMEM;
+
+	vsi->rx_rings = devm_kcalloc(dev, vsi->alloc_rxq,
+				     sizeof(*vsi->rx_rings), GFP_KERNEL);
+	if (!vsi->rx_rings)
+		goto err_rings;
+
+	/* txq_map needs to have enough space to track both Tx (stack) rings
+	 * and XDP rings; at this point vsi->num_xdp_txq might not be set,
+	 * so use num_possible_cpus() as we want to always provide XDP ring
+	 * per CPU, regardless of queue count settings from user that might
+	 * have come from ethtool's set_channels() callback;
+	 */
+	vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()),
+				    sizeof(*vsi->txq_map), GFP_KERNEL);
+
+	if (!vsi->txq_map)
+		goto err_txq_map;
+
+	vsi->rxq_map = devm_kcalloc(dev, vsi->alloc_rxq,
+				    sizeof(*vsi->rxq_map), GFP_KERNEL);
+	if (!vsi->rxq_map)
+		goto err_rxq_map;
+
+	/* There is no need to allocate q_vectors for a loopback VSI. */
+	if (vsi->type == ICE_VSI_LB)
+		return 0;
+
+	/* allocate memory for q_vector pointers */
+	vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors,
+				      sizeof(*vsi->q_vectors), GFP_KERNEL);
+	if (!vsi->q_vectors)
+		goto err_vectors;
+
+	return 0;
+
+err_vectors:
+	devm_kfree(dev, vsi->rxq_map);
+err_rxq_map:
+	devm_kfree(dev, vsi->txq_map);
+err_txq_map:
+	devm_kfree(dev, vsi->rx_rings);
+err_rings:
+	devm_kfree(dev, vsi->tx_rings);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_set_num_desc - Set number of descriptors for queues on this VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
+{
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+	case ICE_VSI_CTRL:
+	case ICE_VSI_LB:
+		/* a user could change the values of num_[tr]x_desc using
+		 * ethtool -G so we should keep those values instead of
+		 * overwriting them with the defaults.
+		 */
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(vsi->back), "Not setting number of Tx/Rx descriptors for VSI type %d\n",
+			vsi->type);
+		break;
+	}
+}
+
+/**
+ * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI
+ * @vsi: the VSI being configured
+ * @vf_id: ID of the VF being configured
+ *
+ * Return 0 on success and a negative value on error
+ */
+static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_vf *vf = NULL;
+
+	if (vsi->type == ICE_VSI_VF)
+		vsi->vf_id = vf_id;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		vsi->alloc_txq = min3(pf->num_lan_msix,
+				      ice_get_avail_txq_count(pf),
+				      (u16)num_online_cpus());
+		if (vsi->req_txq) {
+			vsi->alloc_txq = vsi->req_txq;
+			vsi->num_txq = vsi->req_txq;
+		}
+
+		pf->num_lan_tx = vsi->alloc_txq;
+
+		/* only 1 Rx queue unless RSS is enabled */
+		if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+			vsi->alloc_rxq = 1;
+		} else {
+			vsi->alloc_rxq = min3(pf->num_lan_msix,
+					      ice_get_avail_rxq_count(pf),
+					      (u16)num_online_cpus());
+			if (vsi->req_rxq) {
+				vsi->alloc_rxq = vsi->req_rxq;
+				vsi->num_rxq = vsi->req_rxq;
+			}
+		}
+
+		pf->num_lan_rx = vsi->alloc_rxq;
+
+		vsi->num_q_vectors = min_t(int, pf->num_lan_msix,
+					   max_t(int, vsi->alloc_rxq,
+						 vsi->alloc_txq));
+		break;
+	case ICE_VSI_VF:
+		vf = &pf->vf[vsi->vf_id];
+		if (vf->num_req_qs)
+			vf->num_vf_qs = vf->num_req_qs;
+		vsi->alloc_txq = vf->num_vf_qs;
+		vsi->alloc_rxq = vf->num_vf_qs;
+		/* pf->num_msix_per_vf includes (VF miscellaneous vector +
+		 * data queue interrupts). Since vsi->num_q_vectors is number
+		 * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the
+		 * original vector count
+		 */
+		vsi->num_q_vectors = pf->num_msix_per_vf - ICE_NONQ_VECS_VF;
+		break;
+	case ICE_VSI_CTRL:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
+		vsi->num_q_vectors = 1;
+		break;
+	case ICE_VSI_LB:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
+		break;
+	default:
+		dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type);
+		break;
+	}
+
+	ice_vsi_set_num_desc(vsi);
+}
+
+/**
+ * ice_get_free_slot - get the next non-NULL location index in array
+ * @array: array to search
+ * @size: size of the array
+ * @curr: last known occupied index to be used as a search hint
+ *
+ * void * is being used to keep the functionality generic. This lets us use this
+ * function on any array of pointers.
+ */
+static int ice_get_free_slot(void *array, int size, int curr)
+{
+	int **tmp_array = (int **)array;
+	int next;
+
+	if (curr < (size - 1) && !tmp_array[curr + 1]) {
+		next = curr + 1;
+	} else {
+		int i = 0;
+
+		while ((i < size) && (tmp_array[i]))
+			i++;
+		if (i == size)
+			next = ICE_NO_VSI;
+		else
+			next = i;
+	}
+	return next;
+}
+
+/**
+ * ice_vsi_delete - delete a VSI from the switch
+ * @vsi: pointer to VSI being removed
+ */
+static void ice_vsi_delete(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return;
+
+	if (vsi->type == ICE_VSI_VF)
+		ctxt->vf_num = vsi->vf_id;
+	ctxt->vsi_num = vsi->vsi_num;
+
+	memcpy(&ctxt->info, &vsi->info, sizeof(ctxt->info));
+
+	status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL);
+	if (status)
+		dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+
+	kfree(ctxt);
+}
+
+/**
+ * ice_vsi_free_arrays - De-allocate queue and vector pointer arrays for the VSI
+ * @vsi: pointer to VSI being cleared
+ */
+static void ice_vsi_free_arrays(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	/* free the ring and vector containers */
+	if (vsi->q_vectors) {
+		devm_kfree(dev, vsi->q_vectors);
+		vsi->q_vectors = NULL;
+	}
+	if (vsi->tx_rings) {
+		devm_kfree(dev, vsi->tx_rings);
+		vsi->tx_rings = NULL;
+	}
+	if (vsi->rx_rings) {
+		devm_kfree(dev, vsi->rx_rings);
+		vsi->rx_rings = NULL;
+	}
+	if (vsi->txq_map) {
+		devm_kfree(dev, vsi->txq_map);
+		vsi->txq_map = NULL;
+	}
+	if (vsi->rxq_map) {
+		devm_kfree(dev, vsi->rxq_map);
+		vsi->rxq_map = NULL;
+	}
+}
+
+/**
+ * ice_vsi_clear - clean up and deallocate the provided VSI
+ * @vsi: pointer to VSI being cleared
+ *
+ * This deallocates the VSI's queue resources, removes it from the PF's
+ * VSI array if necessary, and deallocates the VSI
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_vsi_clear(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = NULL;
+	struct device *dev;
+
+	if (!vsi)
+		return 0;
+
+	if (!vsi->back)
+		return -EINVAL;
+
+	pf = vsi->back;
+	dev = ice_pf_to_dev(pf);
+
+	if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
+		dev_dbg(dev, "vsi does not exist at pf->vsi[%d]\n", vsi->idx);
+		return -EINVAL;
+	}
+
+	mutex_lock(&pf->sw_mutex);
+	/* updates the PF for this cleared VSI */
+
+	pf->vsi[vsi->idx] = NULL;
+	if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL)
+		pf->next_vsi = vsi->idx;
+
+	ice_vsi_free_arrays(vsi);
+	mutex_unlock(&pf->sw_mutex);
+	devm_kfree(dev, vsi);
+
+	return 0;
+}
+
+/**
+ * ice_msix_clean_ctrl_vsi - MSIX mode interrupt handler for ctrl VSI
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_ctrl_vsi(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+	if (!q_vector->tx.ring)
+		return IRQ_HANDLED;
+
+#define FDIR_RX_DESC_CLEAN_BUDGET 64
+	ice_clean_rx_irq(q_vector->rx.ring, FDIR_RX_DESC_CLEAN_BUDGET);
+	ice_clean_ctrl_tx_irq(q_vector->tx.ring);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ice_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
+		return IRQ_HANDLED;
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ice_vsi_alloc - Allocates the next available struct VSI in the PF
+ * @pf: board private structure
+ * @vsi_type: type of VSI
+ * @vf_id: ID of the VF being configured
+ *
+ * returns a pointer to a VSI on success, NULL on failure.
+ */
+static struct ice_vsi *
+ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *vsi = NULL;
+
+	/* Need to protect the allocation of the VSIs at the PF level */
+	mutex_lock(&pf->sw_mutex);
+
+	/* If we have already allocated our maximum number of VSIs,
+	 * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index
+	 * is available to be populated
+	 */
+	if (pf->next_vsi == ICE_NO_VSI) {
+		dev_dbg(dev, "out of VSI slots!\n");
+		goto unlock_pf;
+	}
+
+	vsi = devm_kzalloc(dev, sizeof(*vsi), GFP_KERNEL);
+	if (!vsi)
+		goto unlock_pf;
+
+	vsi->type = vsi_type;
+	vsi->back = pf;
+	set_bit(__ICE_DOWN, vsi->state);
+
+	if (vsi_type == ICE_VSI_VF)
+		ice_vsi_set_num_qs(vsi, vf_id);
+	else
+		ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID);
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+
+		/* Setup default MSIX irq handler for VSI */
+		vsi->irq_handler = ice_msix_clean_rings;
+		break;
+	case ICE_VSI_CTRL:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+
+		/* Setup ctrl VSI MSIX irq handler */
+		vsi->irq_handler = ice_msix_clean_ctrl_vsi;
+		break;
+	case ICE_VSI_VF:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+		break;
+	case ICE_VSI_LB:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+		break;
+	default:
+		dev_warn(dev, "Unknown VSI type %d\n", vsi->type);
+		goto unlock_pf;
+	}
+
+	if (vsi->type == ICE_VSI_CTRL) {
+		/* Use the last VSI slot as the index for the control VSI */
+		vsi->idx = pf->num_alloc_vsi - 1;
+		pf->ctrl_vsi_idx = vsi->idx;
+		pf->vsi[vsi->idx] = vsi;
+	} else {
+		/* fill slot and make note of the index */
+		vsi->idx = pf->next_vsi;
+		pf->vsi[pf->next_vsi] = vsi;
+
+		/* prepare pf->next_vsi for next use */
+		pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
+						 pf->next_vsi);
+	}
+	goto unlock_pf;
+
+err_rings:
+	devm_kfree(dev, vsi);
+	vsi = NULL;
+unlock_pf:
+	mutex_unlock(&pf->sw_mutex);
+	return vsi;
+}
+
+/**
+ * ice_alloc_fd_res - Allocate FD resource for a VSI
+ * @vsi: pointer to the ice_vsi
+ *
+ * This allocates the FD resources
+ *
+ * Returns 0 on success, -EPERM on no-op or -EIO on failure
+ */
+static int ice_alloc_fd_res(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	u32 g_val, b_val;
+
+	/* Flow Director filters are only allocated/assigned to the PF VSI which
+	 * passes the traffic. The CTRL VSI is only used to add/delete filters
+	 * so we don't allocate resources to it
+	 */
+
+	/* FD filters from guaranteed pool per VSI */
+	g_val = pf->hw.func_caps.fd_fltr_guar;
+	if (!g_val)
+		return -EPERM;
+
+	/* FD filters from best effort pool */
+	b_val = pf->hw.func_caps.fd_fltr_best_effort;
+	if (!b_val)
+		return -EPERM;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EPERM;
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EPERM;
+
+	vsi->num_gfltr = g_val / pf->num_alloc_vsi;
+
+	/* each VSI gets same "best_effort" quota */
+	vsi->num_bfltr = b_val;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_get_qs - Assign queues from PF to VSI
+ * @vsi: the VSI to assign queues to
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_qs_cfg tx_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_txqs,
+		.pf_map_size = pf->max_pf_txqs,
+		.q_count = vsi->alloc_txq,
+		.scatter_count = ICE_MAX_SCATTER_TXQS,
+		.vsi_map = vsi->txq_map,
+		.vsi_map_offset = 0,
+		.mapping_mode = ICE_VSI_MAP_CONTIG
+	};
+	struct ice_qs_cfg rx_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_rxqs,
+		.pf_map_size = pf->max_pf_rxqs,
+		.q_count = vsi->alloc_rxq,
+		.scatter_count = ICE_MAX_SCATTER_RXQS,
+		.vsi_map = vsi->rxq_map,
+		.vsi_map_offset = 0,
+		.mapping_mode = ICE_VSI_MAP_CONTIG
+	};
+	int ret;
+
+	ret = __ice_vsi_get_qs(&tx_qs_cfg);
+	if (ret)
+		return ret;
+	vsi->tx_mapping_mode = tx_qs_cfg.mapping_mode;
+
+	ret = __ice_vsi_get_qs(&rx_qs_cfg);
+	if (ret)
+		return ret;
+	vsi->rx_mapping_mode = rx_qs_cfg.mapping_mode;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_put_qs - Release queues from VSI to PF
+ * @vsi: the VSI that is going to release queues
+ */
+static void ice_vsi_put_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	mutex_lock(&pf->avail_q_mutex);
+
+	for (i = 0; i < vsi->alloc_txq; i++) {
+		clear_bit(vsi->txq_map[i], pf->avail_txqs);
+		vsi->txq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		clear_bit(vsi->rxq_map[i], pf->avail_rxqs);
+		vsi->rxq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+}
+
+/**
+ * ice_is_safe_mode
+ * @pf: pointer to the PF struct
+ *
+ * returns true if driver is in safe mode, false otherwise
+ */
+bool ice_is_safe_mode(struct ice_pf *pf)
+{
+	return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+}
+
+/**
+ * ice_vsi_clean_rss_flow_fld - Delete RSS configuration
+ * @vsi: the VSI being cleaned up
+ *
+ * This function deletes RSS input set for all flows that were configured
+ * for this VSI
+ */
+static void ice_vsi_clean_rss_flow_fld(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+
+	if (ice_is_safe_mode(pf))
+		return;
+
+	status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
+	if (status)
+		dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+}
+
+/**
+ * ice_rss_clean - Delete RSS related VSI structures and configuration
+ * @vsi: the VSI being removed
+ */
+static void ice_rss_clean(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	if (vsi->rss_hkey_user)
+		devm_kfree(dev, vsi->rss_hkey_user);
+	if (vsi->rss_lut_user)
+		devm_kfree(dev, vsi->rss_lut_user);
+
+	ice_vsi_clean_rss_flow_fld(vsi);
+	/* remove RSS replay list */
+	if (!ice_is_safe_mode(pf))
+		ice_rem_vsi_rss_list(&pf->hw, vsi->idx);
+}
+
+/**
+ * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
+{
+	struct ice_hw_common_caps *cap;
+	struct ice_pf *pf = vsi->back;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		vsi->rss_size = 1;
+		return;
+	}
+
+	cap = &pf->hw.func_caps.common_cap;
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		vsi->rss_table_size = (u16)cap->rss_table_size;
+		vsi->rss_size = min_t(u16, num_online_cpus(),
+				      BIT(cap->rss_table_entry_width));
+		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		/* VF VSI will get a small RSS table.
+		 * For VSI_LUT, LUT size should be set to 64 bytes.
+		 */
+		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+		vsi->rss_size = ICE_MAX_RSS_QS_PER_VF;
+		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
+		break;
+	case ICE_VSI_LB:
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(pf), "Unsupported VSI type %s\n",
+			ice_vsi_type_str(vsi->type));
+		break;
+	}
+}
+
+/**
+ * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ *
+ * This initializes a default VSI context for all sections except the Queues.
+ */
+static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
+{
+	u32 table = 0;
+
+	memset(&ctxt->info, 0, sizeof(ctxt->info));
+	/* VSI's should be allocated from shared pool */
+	ctxt->alloc_from_pool = true;
+	/* Src pruning enabled by default */
+	ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
+	/* Traffic from VSI can be sent to LAN */
+	ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
+	/* By default bits 3 and 4 in vlan_flags are 0's which results in legacy
+	 * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all
+	 * packets untagged/tagged.
+	 */
+	ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL &
+				  ICE_AQ_VSI_VLAN_MODE_M) >>
+				 ICE_AQ_VSI_VLAN_MODE_S);
+	/* Have 1:1 UP mapping for both ingress/egress tables */
+	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
+	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
+	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
+	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
+	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
+	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
+	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
+	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
+	ctxt->info.ingress_table = cpu_to_le32(table);
+	ctxt->info.egress_table = cpu_to_le32(table);
+	/* Have 1:1 UP mapping for outer to inner UP table */
+	ctxt->info.outer_up_table = cpu_to_le32(table);
+	/* No Outer tag support outer_tag_flags remains to zero */
+}
+
+/**
+ * ice_vsi_setup_q_map - Setup a VSI queue map
+ * @vsi: the VSI being configured
+ * @ctxt: VSI context structure
+ */
+static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+	u16 offset = 0, qmap = 0, tx_count = 0;
+	u16 qcount_tx = vsi->alloc_txq;
+	u16 qcount_rx = vsi->alloc_rxq;
+	u16 tx_numq_tc, rx_numq_tc;
+	u16 pow = 0, max_rss = 0;
+	bool ena_tc0 = false;
+	u8 netdev_tc = 0;
+	int i;
+
+	/* at least TC0 should be enabled by default */
+	if (vsi->tc_cfg.numtc) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(0)))
+			ena_tc0 = true;
+	} else {
+		ena_tc0 = true;
+	}
+
+	if (ena_tc0) {
+		vsi->tc_cfg.numtc++;
+		vsi->tc_cfg.ena_tc |= 1;
+	}
+
+	rx_numq_tc = qcount_rx / vsi->tc_cfg.numtc;
+	if (!rx_numq_tc)
+		rx_numq_tc = 1;
+	tx_numq_tc = qcount_tx / vsi->tc_cfg.numtc;
+	if (!tx_numq_tc)
+		tx_numq_tc = 1;
+
+	/* TC mapping is a function of the number of Rx queues assigned to the
+	 * VSI for each traffic class and the offset of these queues.
+	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
+	 * queues allocated to TC0. No:of queues is a power-of-2.
+	 *
+	 * If TC is not enabled, the queue offset is set to 0, and allocate one
+	 * queue, this way, traffic for the given TC will be sent to the default
+	 * queue.
+	 *
+	 * Setup number and offset of Rx queues for all TCs for the VSI
+	 */
+
+	qcount_rx = rx_numq_tc;
+
+	/* qcount will change if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) {
+		if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF) {
+			if (vsi->type == ICE_VSI_PF)
+				max_rss = ICE_MAX_LG_RSS_QS;
+			else
+				max_rss = ICE_MAX_RSS_QS_PER_VF;
+			qcount_rx = min_t(u16, rx_numq_tc, max_rss);
+			if (!vsi->req_rxq)
+				qcount_rx = min_t(u16, qcount_rx,
+						  vsi->rss_size);
+		}
+	}
+
+	/* find the (rounded up) power-of-2 of qcount */
+	pow = (u16)order_base_2(qcount_rx);
+
+	ice_for_each_traffic_class(i) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+			/* TC is not enabled */
+			vsi->tc_cfg.tc_info[i].qoffset = 0;
+			vsi->tc_cfg.tc_info[i].qcount_rx = 1;
+			vsi->tc_cfg.tc_info[i].qcount_tx = 1;
+			vsi->tc_cfg.tc_info[i].netdev_tc = 0;
+			ctxt->info.tc_mapping[i] = 0;
+			continue;
+		}
+
+		/* TC is enabled */
+		vsi->tc_cfg.tc_info[i].qoffset = offset;
+		vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx;
+		vsi->tc_cfg.tc_info[i].qcount_tx = tx_numq_tc;
+		vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
+
+		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+			ICE_AQ_VSI_TC_Q_OFFSET_M) |
+			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+			 ICE_AQ_VSI_TC_Q_NUM_M);
+		offset += qcount_rx;
+		tx_count += tx_numq_tc;
+		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+	}
+
+	/* if offset is non-zero, means it is calculated correctly based on
+	 * enabled TCs for a given VSI otherwise qcount_rx will always
+	 * be correct and non-zero because it is based off - VSI's
+	 * allocated Rx queues which is at least 1 (hence qcount_tx will be
+	 * at least 1)
+	 */
+	if (offset)
+		vsi->num_rxq = offset;
+	else
+		vsi->num_rxq = qcount_rx;
+
+	vsi->num_txq = tx_count;
+
+	if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
+		dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
+		/* since there is a chance that num_rxq could have been changed
+		 * in the above for loop, make num_txq equal to num_rxq.
+		 */
+		vsi->num_txq = vsi->num_rxq;
+	}
+
+	/* Rx queue mapping */
+	ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+	/* q_mapping buffer holds the info for the first queue allocated for
+	 * this VSI in the PF space and also the number of queues associated
+	 * with this VSI.
+	 */
+	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+	ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
+}
+
+/**
+ * ice_set_fd_vsi_ctx - Set FD VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+	u8 dflt_q_group, dflt_q_prio;
+	u16 dflt_q, report_q, val;
+
+	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL)
+		return;
+
+	val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID;
+	ctxt->info.valid_sections |= cpu_to_le16(val);
+	dflt_q = 0;
+	dflt_q_group = 0;
+	report_q = 0;
+	dflt_q_prio = 0;
+
+	/* enable flow director filtering/programming */
+	val = ICE_AQ_VSI_FD_ENABLE | ICE_AQ_VSI_FD_PROG_ENABLE;
+	ctxt->info.fd_options = cpu_to_le16(val);
+	/* max of allocated flow director filters */
+	ctxt->info.max_fd_fltr_dedicated =
+			cpu_to_le16(vsi->num_gfltr);
+	/* max of shared flow director filters any VSI may program */
+	ctxt->info.max_fd_fltr_shared =
+			cpu_to_le16(vsi->num_bfltr);
+	/* default queue index within the VSI of the default FD */
+	val = ((dflt_q << ICE_AQ_VSI_FD_DEF_Q_S) &
+	       ICE_AQ_VSI_FD_DEF_Q_M);
+	/* target queue or queue group to the FD filter */
+	val |= ((dflt_q_group << ICE_AQ_VSI_FD_DEF_GRP_S) &
+		ICE_AQ_VSI_FD_DEF_GRP_M);
+	ctxt->info.fd_def_q = cpu_to_le16(val);
+	/* queue index on which FD filter completion is reported */
+	val = ((report_q << ICE_AQ_VSI_FD_REPORT_Q_S) &
+	       ICE_AQ_VSI_FD_REPORT_Q_M);
+	/* priority of the default qindex action */
+	val |= ((dflt_q_prio << ICE_AQ_VSI_FD_DEF_PRIORITY_S) &
+		ICE_AQ_VSI_FD_DEF_PRIORITY_M);
+	ctxt->info.fd_report_opt = cpu_to_le16(val);
+}
+
+/**
+ * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+	u8 lut_type, hash_type;
+	struct device *dev;
+	struct ice_pf *pf;
+
+	pf = vsi->back;
+	dev = ice_pf_to_dev(pf);
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
+		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		break;
+	case ICE_VSI_VF:
+		/* VF VSI will gets a small RSS table which is a VSI LUT type */
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
+		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		break;
+	default:
+		dev_dbg(dev, "Unsupported VSI type %s\n",
+			ice_vsi_type_str(vsi->type));
+		return;
+	}
+
+	ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+				ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+				(hash_type & ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+}
+
+/**
+ * ice_vsi_init - Create and initialize a VSI
+ * @vsi: the VSI being configured
+ * @init_vsi: is this call creating a VSI
+ *
+ * This initializes a VSI context depending on the VSI type to be added and
+ * passes it down to the add_vsi aq command to create a new VSI.
+ */
+static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vsi_ctx *ctxt;
+	struct device *dev;
+	int ret = 0;
+
+	dev = ice_pf_to_dev(pf);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	switch (vsi->type) {
+	case ICE_VSI_CTRL:
+	case ICE_VSI_LB:
+	case ICE_VSI_PF:
+		ctxt->flags = ICE_AQ_VSI_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		ctxt->flags = ICE_AQ_VSI_TYPE_VF;
+		/* VF number here is the absolute VF number (0-255) */
+		ctxt->vf_num = vsi->vf_id + hw->func_caps.vf_base_id;
+		break;
+	default:
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ice_set_dflt_vsi_ctx(ctxt);
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		ice_set_fd_vsi_ctx(ctxt, vsi);
+	/* if the switch is in VEB mode, allow VSI loopback */
+	if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
+		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+
+	/* Set LUT type and HASH type if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags) &&
+	    vsi->type != ICE_VSI_CTRL) {
+		ice_set_rss_vsi_ctx(ctxt, vsi);
+		/* if updating VSI context, make sure to set valid_section:
+		 * to indicate which section of VSI context being updated
+		 */
+		if (!init_vsi)
+			ctxt->info.valid_sections |=
+				cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+	}
+
+	ctxt->info.sw_id = vsi->port_info->sw_id;
+	ice_vsi_setup_q_map(vsi, ctxt);
+	if (!init_vsi) /* means VSI being updated */
+		/* must to indicate which section of VSI context are
+		 * being modified
+		 */
+		ctxt->info.valid_sections |=
+			cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+
+	/* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off
+	 * respectively
+	 */
+	if (vsi->type == ICE_VSI_VF) {
+		ctxt->info.valid_sections |=
+			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+		if (pf->vf[vsi->vf_id].spoofchk) {
+			ctxt->info.sec_flags |=
+				ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+				(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+				 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+		} else {
+			ctxt->info.sec_flags &=
+				~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+				  (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+				   ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
+		}
+	}
+
+	/* Allow control frames out of main VSI */
+	if (vsi->type == ICE_VSI_PF) {
+		ctxt->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+		ctxt->info.valid_sections |=
+			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+	}
+
+	if (init_vsi) {
+		ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
+		if (ret) {
+			dev_err(dev, "Add VSI failed, err %d\n", ret);
+			ret = -EIO;
+			goto out;
+		}
+	} else {
+		ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+		if (ret) {
+			dev_err(dev, "Update VSI failed, err %d\n", ret);
+			ret = -EIO;
+			goto out;
+		}
+	}
+
+	/* keep context for update VSI operations */
+	vsi->info = ctxt->info;
+
+	/* record VSI number returned */
+	vsi->vsi_num = ctxt->vsi_num;
+
+out:
+	kfree(ctxt);
+	return ret;
+}
+
+/**
+ * ice_free_res - free a block of resources
+ * @res: pointer to the resource
+ * @index: starting index previously returned by ice_get_res
+ * @id: identifier to track owner
+ *
+ * Returns number of resources freed
+ */
+int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
+{
+	int count = 0;
+	int i;
+
+	if (!res || index >= res->end)
+		return -EINVAL;
+
+	id |= ICE_RES_VALID_BIT;
+	for (i = index; i < res->end && res->list[i] == id; i++) {
+		res->list[i] = 0;
+		count++;
+	}
+
+	return count;
+}
+
+/**
+ * ice_search_res - Search the tracker for a block of resources
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ *
+ * Returns the base item index of the block, or -ENOMEM for error
+ */
+static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
+{
+	u16 start = 0, end = 0;
+
+	if (needed > res->end)
+		return -ENOMEM;
+
+	id |= ICE_RES_VALID_BIT;
+
+	do {
+		/* skip already allocated entries */
+		if (res->list[end++] & ICE_RES_VALID_BIT) {
+			start = end;
+			if ((start + needed) > res->end)
+				break;
+		}
+
+		if (end == (start + needed)) {
+			int i = start;
+
+			/* there was enough, so assign it to the requestor */
+			while (i != end)
+				res->list[i++] = id;
+
+			return start;
+		}
+	} while (end < res->end);
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_get_free_res_count - Get free count from a resource tracker
+ * @res: Resource tracker instance
+ */
+static u16 ice_get_free_res_count(struct ice_res_tracker *res)
+{
+	u16 i, count = 0;
+
+	for (i = 0; i < res->end; i++)
+		if (!(res->list[i] & ICE_RES_VALID_BIT))
+			count++;
+
+	return count;
+}
+
+/**
+ * ice_get_res - get a block of resources
+ * @pf: board private structure
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ *
+ * Returns the base item index of the block, or negative for error
+ */
+int
+ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
+{
+	if (!res || !pf)
+		return -EINVAL;
+
+	if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
+		dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n",
+			needed, res->num_entries, id);
+		return -EINVAL;
+	}
+
+	return ice_search_res(res, needed, id);
+}
+
+/**
+ * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
+ * @vsi: ptr to the VSI
+ *
+ * This should only be called after ice_vsi_alloc() which allocates the
+ * corresponding SW VSI structure and initializes num_queue_pairs for the
+ * newly allocated VSI.
+ *
+ * Returns 0 on success or negative on failure
+ */
+static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u16 num_q_vectors;
+	int base;
+
+	dev = ice_pf_to_dev(pf);
+	/* SRIOV doesn't grab irq_tracker entries for each VSI */
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
+
+	if (vsi->base_vector) {
+		dev_dbg(dev, "VSI %d has non-zero base vector %d\n",
+			vsi->vsi_num, vsi->base_vector);
+		return -EEXIST;
+	}
+
+	num_q_vectors = vsi->num_q_vectors;
+	/* reserve slots from OS requested IRQs */
+	base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx);
+
+	if (base < 0) {
+		dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n",
+			ice_get_free_res_count(pf->irq_tracker),
+			ice_vsi_type_str(vsi->type), vsi->idx, num_q_vectors);
+		return -ENOENT;
+	}
+	vsi->base_vector = (u16)base;
+	pf->num_avail_sw_msix -= num_q_vectors;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
+ * @vsi: the VSI having rings deallocated
+ */
+static void ice_vsi_clear_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	/* Avoid stale references by clearing map from vector to ring */
+	if (vsi->q_vectors) {
+		ice_for_each_q_vector(vsi, i) {
+			struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+			if (q_vector) {
+				q_vector->tx.ring = NULL;
+				q_vector->rx.ring = NULL;
+			}
+		}
+	}
+
+	if (vsi->tx_rings) {
+		for (i = 0; i < vsi->alloc_txq; i++) {
+			if (vsi->tx_rings[i]) {
+				kfree_rcu(vsi->tx_rings[i], rcu);
+				WRITE_ONCE(vsi->tx_rings[i], NULL);
+			}
+		}
+	}
+	if (vsi->rx_rings) {
+		for (i = 0; i < vsi->alloc_rxq; i++) {
+			if (vsi->rx_rings[i]) {
+				kfree_rcu(vsi->rx_rings[i], rcu);
+				WRITE_ONCE(vsi->rx_rings[i], NULL);
+			}
+		}
+	}
+}
+
+/**
+ * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI
+ * @vsi: VSI which is having rings allocated
+ */
+static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u16 i;
+
+	dev = ice_pf_to_dev(pf);
+	/* Allocate Tx rings */
+	for (i = 0; i < vsi->alloc_txq; i++) {
+		struct ice_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->txq_map[i];
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->dev = dev;
+		ring->count = vsi->num_tx_desc;
+		ring->txq_teid = ICE_INVAL_TEID;
+		WRITE_ONCE(vsi->tx_rings[i], ring);
+	}
+
+	/* Allocate Rx rings */
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		struct ice_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->rxq_map[i];
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = dev;
+		ring->count = vsi->num_rx_desc;
+		WRITE_ONCE(vsi->rx_rings[i], ring);
+	}
+
+	return 0;
+
+err_out:
+	ice_vsi_clear_rings(vsi);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_manage_rss_lut - disable/enable RSS
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is an enable or disable request
+ *
+ * In the event of disable request for RSS, this function will zero out RSS
+ * LUT, while in the event of enable request for RSS, it will reconfigure RSS
+ * LUT.
+ */
+int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
+{
+	int err = 0;
+	u8 *lut;
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (ena) {
+		if (vsi->rss_lut_user)
+			memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+		else
+			ice_fill_rss_lut(lut, vsi->rss_table_size,
+					 vsi->rss_size);
+	}
+
+	err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size);
+	kfree(lut);
+	return err;
+}
+
+/**
+ * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI
+ * @vsi: VSI to be configured
+ */
+static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
+{
+	struct ice_aqc_get_set_rss_keys *key;
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+	int err = 0;
+	u8 *lut;
+
+	dev = ice_pf_to_dev(pf);
+	vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+
+	status = ice_aq_set_rss_lut(&pf->hw, vsi->idx, vsi->rss_lut_type, lut,
+				    vsi->rss_table_size);
+
+	if (status) {
+		dev_err(dev, "set_rss_lut failed, error %s\n",
+			ice_stat_str(status));
+		err = -EIO;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	key = kzalloc(sizeof(*key), GFP_KERNEL);
+	if (!key) {
+		err = -ENOMEM;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	if (vsi->rss_hkey_user)
+		memcpy(key,
+		       (struct ice_aqc_get_set_rss_keys *)vsi->rss_hkey_user,
+		       ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+	else
+		netdev_rss_key_fill((void *)key,
+				    ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+
+	status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key);
+
+	if (status) {
+		dev_err(dev, "set_rss_key failed, error %s\n",
+			ice_stat_str(status));
+		err = -EIO;
+	}
+
+	kfree(key);
+ice_vsi_cfg_rss_exit:
+	kfree(lut);
+	return err;
+}
+
+/**
+ * ice_vsi_set_vf_rss_flow_fld - Sets VF VSI RSS input set for different flows
+ * @vsi: VSI to be configured
+ *
+ * This function will only be called during the VF VSI setup. Upon successful
+ * completion of package download, this function will configure default RSS
+ * input sets for VF VSI.
+ */
+static void ice_vsi_set_vf_rss_flow_fld(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, ICE_DEFAULT_RSS_HENA);
+	if (status)
+		dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+}
+
+/**
+ * ice_vsi_set_rss_flow_fld - Sets RSS input set for different flows
+ * @vsi: VSI to be configured
+ *
+ * This function will only be called after successful download package call
+ * during initialization of PF. Since the downloaded package will erase the
+ * RSS section, this function will configure RSS input sets for different
+ * flow types. The last profile added has the highest priority, therefore 2
+ * tuple profiles (i.e. IPv4 src/dst) are added before 4 tuple profiles
+ * (i.e. IPv4 src/dst TCP src/dst port).
+ */
+static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
+{
+	u16 vsi_handle = vsi->idx, vsi_num = vsi->vsi_num;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi_num);
+		return;
+	}
+	/* configure RSS for IPv4 with input set IP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
+				 ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
+
+	/* configure RSS for IPv6 with input set IPv6 src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
+				 ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
+
+	/* configure RSS for tcp4 with input set IP src/dst, TCP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV4,
+				 ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
+
+	/* configure RSS for udp4 with input set IP src/dst, UDP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV4,
+				 ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
+
+	/* configure RSS for sctp4 with input set IP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
+				 ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
+
+	/* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV6,
+				 ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
+
+	/* configure RSS for udp6 with input set IPv6 src/dst, UDP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV6,
+				 ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
+
+	/* configure RSS for sctp6 with input set IPv6 src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
+				 ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
+}
+
+/**
+ * ice_pf_state_is_nominal - checks the PF for nominal state
+ * @pf: pointer to PF to check
+ *
+ * Check the PF's state for a collection of bits that would indicate
+ * the PF is in a state that would inhibit normal operation for
+ * driver functionality.
+ *
+ * Returns true if PF is in a nominal state, false otherwise
+ */
+bool ice_pf_state_is_nominal(struct ice_pf *pf)
+{
+	DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 };
+
+	if (!pf)
+		return false;
+
+	bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS);
+	if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_update_eth_stats - Update VSI-specific ethernet statistics counters
+ * @vsi: the VSI to be updated
+ */
+void ice_update_eth_stats(struct ice_vsi *vsi)
+{
+	struct ice_eth_stats *prev_es, *cur_es;
+	struct ice_hw *hw = &vsi->back->hw;
+	u16 vsi_num = vsi->vsi_num;    /* HW absolute index of a VSI */
+
+	prev_es = &vsi->eth_stats_prev;
+	cur_es = &vsi->eth_stats;
+
+	ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_bytes, &cur_es->rx_bytes);
+
+	ice_stat_update40(hw, GLV_UPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_unicast, &cur_es->rx_unicast);
+
+	ice_stat_update40(hw, GLV_MPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_multicast, &cur_es->rx_multicast);
+
+	ice_stat_update40(hw, GLV_BPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_broadcast, &cur_es->rx_broadcast);
+
+	ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_discards, &cur_es->rx_discards);
+
+	ice_stat_update40(hw, GLV_GOTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_bytes, &cur_es->tx_bytes);
+
+	ice_stat_update40(hw, GLV_UPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_unicast, &cur_es->tx_unicast);
+
+	ice_stat_update40(hw, GLV_MPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_multicast, &cur_es->tx_multicast);
+
+	ice_stat_update40(hw, GLV_BPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_broadcast, &cur_es->tx_broadcast);
+
+	ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_errors, &cur_es->tx_errors);
+
+	vsi->stat_offsets_loaded = true;
+}
+
+/**
+ * ice_vsi_add_vlan - Add VSI membership for given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN ID to be added
+ * @action: filter action to be performed on match
+ */
+int
+ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int err = 0;
+
+	dev = ice_pf_to_dev(pf);
+
+	if (!ice_fltr_add_vlan(vsi, vid, action)) {
+		vsi->num_vlan++;
+	} else {
+		err = -ENODEV;
+		dev_err(dev, "Failure Adding VLAN %d on VSI %i\n", vid,
+			vsi->vsi_num);
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN ID to be removed
+ *
+ * Returns 0 on success and negative on failure
+ */
+int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+	int err = 0;
+
+	dev = ice_pf_to_dev(pf);
+
+	status = ice_fltr_remove_vlan(vsi, vid, ICE_FWD_TO_VSI);
+	if (!status) {
+		vsi->num_vlan--;
+	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %s\n",
+			vid, vsi->vsi_num, ice_stat_str(status));
+	} else {
+		dev_err(dev, "Error removing VLAN %d on vsi %i error: %s\n",
+			vid, vsi->vsi_num, ice_stat_str(status));
+		err = -EIO;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+	if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+		vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+		vsi->rx_buf_len = ICE_RXBUF_2048;
+#if (PAGE_SIZE < 8192)
+	} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+		   (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+		vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+		vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+	} else {
+		vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+#if (PAGE_SIZE < 8192)
+		vsi->rx_buf_len = ICE_RXBUF_3072;
+#else
+		vsi->rx_buf_len = ICE_RXBUF_2048;
+#endif
+	}
+}
+
+/**
+ * ice_write_qrxflxp_cntxt - write/configure QRXFLXP_CNTXT register
+ * @hw: HW pointer
+ * @pf_q: index of the Rx queue in the PF's queue space
+ * @rxdid: flexible descriptor RXDID
+ * @prio: priority for the RXDID for this queue
+ */
+void
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio)
+{
+	int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+
+	/* clear any previous values */
+	regval &= ~(QRXFLXP_CNTXT_RXDID_IDX_M |
+		    QRXFLXP_CNTXT_RXDID_PRIO_M |
+		    QRXFLXP_CNTXT_TS_M);
+
+	regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+		QRXFLXP_CNTXT_RXDID_IDX_M;
+
+	regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+		QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+}
+
+/**
+ * ice_vsi_cfg_rxqs - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Rx VSI for operation.
+ */
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
+{
+	u16 i;
+
+	if (vsi->type == ICE_VSI_VF)
+		goto setup_rings;
+
+	ice_vsi_cfg_frame_size(vsi);
+setup_rings:
+	/* set up individual rings */
+	for (i = 0; i < vsi->num_rxq; i++) {
+		int err;
+
+		err = ice_setup_rx_ctx(vsi->rx_rings[i]);
+		if (err) {
+			dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
+				i, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ * @rings: Tx ring array to be configured
+ * @count: number of Tx ring array elements
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+static int
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	u16 q_idx = 0;
+	int err = 0;
+
+	qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	for (q_idx = 0; q_idx < count; q_idx++) {
+		err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+		if (err)
+			goto err_cfg_txqs;
+	}
+
+err_cfg_txqs:
+	kfree(qg_buf);
+	return err;
+}
+
+/**
+ * ice_vsi_cfg_lan_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
+{
+	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
+}
+
+/**
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
+ */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
+{
+	int ret;
+	int i;
+
+	ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		vsi->xdp_rings[i]->xsk_pool = ice_xsk_pool(vsi->xdp_rings[i]);
+
+	return ret;
+}
+
+/**
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
+ *
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
+ */
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
+{
+	u32 val = intrl / gran;
+
+	if (val)
+		return val | GLINT_RATE_INTRL_ENA_M;
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
+ * @vsi: the VSI being configured
+ *
+ * This configures MSIX mode interrupts for the PF VSI, and should not be used
+ * for the VF VSI.
+ */
+void ice_vsi_cfg_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u16 txq = 0, rxq = 0;
+	int i, q;
+
+	for (i = 0; i < vsi->num_q_vectors; i++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+		u16 reg_idx = q_vector->reg_idx;
+
+		ice_cfg_itr(hw, q_vector);
+
+		wr32(hw, GLINT_RATE(reg_idx),
+		     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+		/* Both Transmit Queue Interrupt Cause Control register
+		 * and Receive Queue Interrupt Cause control register
+		 * expects MSIX_INDX field to be the vector index
+		 * within the function space and not the absolute
+		 * vector index across PF or across device.
+		 * For SR-IOV VF VSIs queue vector index always starts
+		 * with 1 since first vector index(0) is used for OICR
+		 * in VF space. Since VMDq and other PF VSIs are within
+		 * the PF function space, use the vector index that is
+		 * tracked for this PF.
+		 */
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			ice_cfg_txq_interrupt(vsi, txq, reg_idx,
+					      q_vector->tx.itr_idx);
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			ice_cfg_rxq_interrupt(vsi, rxq, reg_idx,
+					      q_vector->rx.itr_idx);
+			rxq++;
+		}
+	}
+}
+
+/**
+ * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
+ * @vsi: the VSI being changed
+ */
+int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	int ret = 0;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	/* Here we are configuring the VSI to let the driver add VLAN tags by
+	 * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag
+	 * insertion happens in the Tx hot path, in ice_tx_map.
+	 */
+	ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL;
+
+	/* Preserve existing VLAN strip setting */
+	ctxt->info.vlan_flags |= (vsi->info.vlan_flags &
+				  ICE_AQ_VSI_VLAN_EMOD_M);
+
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		ret = -EIO;
+		goto out;
+	}
+
+	vsi->info.vlan_flags = ctxt->info.vlan_flags;
+out:
+	kfree(ctxt);
+	return ret;
+}
+
+/**
+ * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is a enable or disable request
+ */
+int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	int ret = 0;
+
+	/* do not allow modifying VLAN stripping when a port VLAN is configured
+	 * on this VSI
+	 */
+	if (vsi->info.pvid)
+		return 0;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	/* Here we are configuring what the VSI should do with the VLAN tag in
+	 * the Rx packet. We can either leave the tag in the packet or put it in
+	 * the Rx descriptor.
+	 */
+	if (ena)
+		/* Strip VLAN tag from Rx packet and put it in the desc */
+		ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH;
+	else
+		/* Disable stripping. Leave tag in packet */
+		ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING;
+
+	/* Allow all packets untagged/tagged */
+	ctxt->info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL;
+
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %s aq_err %s\n",
+			ena, ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		ret = -EIO;
+		goto out;
+	}
+
+	vsi->info.vlan_flags = ctxt->info.vlan_flags;
+out:
+	kfree(ctxt);
+	return ret;
+}
+
+/**
+ * ice_vsi_start_all_rx_rings - start/enable all of a VSI's Rx rings
+ * @vsi: the VSI whose rings are to be enabled
+ *
+ * Returns 0 on success and a negative value on error
+ */
+int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_all_rx_rings(vsi, true);
+}
+
+/**
+ * ice_vsi_stop_all_rx_rings - stop/disable all of a VSI's Rx rings
+ * @vsi: the VSI whose rings are to be disabled
+ *
+ * Returns 0 on success and a negative value on error
+ */
+int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_all_rx_rings(vsi, false);
+}
+
+/**
+ * ice_vsi_stop_tx_rings - Disable Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @rings: Tx ring array to be stopped
+ * @count: number of Tx ring array elements
+ */
+static int
+ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		      u16 rel_vmvf_num, struct ice_ring **rings, u16 count)
+{
+	u16 q_idx;
+
+	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+		return -EINVAL;
+
+	for (q_idx = 0; q_idx < count; q_idx++) {
+		struct ice_txq_meta txq_meta = { };
+		int status;
+
+		if (!rings || !rings[q_idx])
+			return -EINVAL;
+
+		ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+		status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
+					      rings[q_idx], &txq_meta);
+
+		if (status)
+			return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_stop_lan_tx_rings - Disable LAN Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ */
+int
+ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+			  u16 rel_vmvf_num)
+{
+	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
+}
+
+/**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
+}
+
+/**
+ * ice_vsi_is_vlan_pruning_ena - check if VLAN pruning is enabled or not
+ * @vsi: VSI to check whether or not VLAN pruning is enabled.
+ *
+ * returns true if Rx VLAN pruning is enabled and false otherwise.
+ */
+bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi)
+{
+	if (!vsi)
+		return false;
+
+	return (vsi->info.sw_flags2 & ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA);
+}
+
+/**
+ * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
+ * @vsi: VSI to enable or disable VLAN pruning on
+ * @ena: set to true to enable VLAN pruning and false to disable it
+ * @vlan_promisc: enable valid security flags if not in VLAN promiscuous mode
+ *
+ * returns 0 if VSI is updated, negative otherwise
+ */
+int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
+{
+	struct ice_vsi_ctx *ctxt;
+	struct ice_pf *pf;
+	int status;
+
+	if (!vsi)
+		return -EINVAL;
+
+	/* Don't enable VLAN pruning if the netdev is currently in promiscuous
+	 * mode. VLAN pruning will be enabled when the interface exits
+	 * promiscuous mode if any VLAN filters are active.
+	 */
+	if (vsi->netdev && vsi->netdev->flags & IFF_PROMISC && ena)
+		return 0;
+
+	pf = vsi->back;
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+
+	if (ena)
+		ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	else
+		ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+	if (!vlan_promisc)
+		ctxt->info.valid_sections =
+			cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+
+	status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %s, aq_err = %s\n",
+			   ena ? "En" : "Dis", vsi->idx, vsi->vsi_num,
+			   ice_stat_str(status),
+			   ice_aq_str(pf->hw.adminq.sq_last_status));
+		goto err_out;
+	}
+
+	vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+
+	kfree(ctxt);
+	return 0;
+
+err_out:
+	kfree(ctxt);
+	return -EIO;
+}
+
+static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
+{
+	struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+
+	vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
+	vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
+}
+
+/**
+ * ice_vsi_set_q_vectors_reg_idx - set the HW register index for all q_vectors
+ * @vsi: VSI to set the q_vectors register index on
+ */
+static int
+ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
+{
+	u16 i;
+
+	if (!vsi || !vsi->q_vectors)
+		return -EINVAL;
+
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		if (!q_vector) {
+			dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n",
+				i, vsi->vsi_num);
+			goto clear_reg_idx;
+		}
+
+		if (vsi->type == ICE_VSI_VF) {
+			struct ice_vf *vf = &vsi->back->vf[vsi->vf_id];
+
+			q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector);
+		} else {
+			q_vector->reg_idx =
+				q_vector->v_idx + vsi->base_vector;
+		}
+	}
+
+	return 0;
+
+clear_reg_idx:
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		if (q_vector)
+			q_vector->reg_idx = 0;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
+ * @vsi: the VSI being configured
+ * @tx: bool to determine Tx or Rx rule
+ * @create: bool to determine create or remove Rule
+ */
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
+{
+	enum ice_status (*eth_fltr)(struct ice_vsi *v, u16 type, u16 flag,
+				    enum ice_sw_fwd_act_type act);
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth;
+
+	if (tx)
+		status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX,
+				  ICE_DROP_PACKET);
+	else
+		status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX, ICE_FWD_TO_VSI);
+
+	if (status)
+		dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
+			create ? "adding" : "removing", tx ? "TX" : "RX",
+			vsi->vsi_num, ice_stat_str(status));
+}
+
+/**
+ * ice_vsi_setup - Set up a VSI by a given type
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ * @vsi_type: VSI type
+ * @vf_id: defines VF ID to which this VSI connects. This field is meant to be
+ *         used only for ICE_VSI_VF VSI type. For other VSI types, should
+ *         fill-in ICE_INVAL_VFID as input.
+ *
+ * This allocates the sw VSI structure and its queue resources.
+ *
+ * Returns pointer to the successfully allocated and configured VSI sw struct on
+ * success, NULL on failure.
+ */
+struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+	      enum ice_vsi_type vsi_type, u16 vf_id)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_status status;
+	struct ice_vsi *vsi;
+	int ret, i;
+
+	if (vsi_type == ICE_VSI_VF)
+		vsi = ice_vsi_alloc(pf, vsi_type, vf_id);
+	else
+		vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID);
+
+	if (!vsi) {
+		dev_err(dev, "could not allocate VSI\n");
+		return NULL;
+	}
+
+	vsi->port_info = pi;
+	vsi->vsw = pf->first_sw;
+	if (vsi->type == ICE_VSI_PF)
+		vsi->ethtype = ETH_P_PAUSE;
+
+	if (vsi->type == ICE_VSI_VF)
+		vsi->vf_id = vf_id;
+
+	ice_alloc_fd_res(vsi);
+
+	if (ice_vsi_get_qs(vsi)) {
+		dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
+			vsi->idx);
+		goto unroll_vsi_alloc;
+	}
+
+	/* set RSS capabilities */
+	ice_vsi_set_rss_params(vsi);
+
+	/* set TC configuration */
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* create the VSI */
+	ret = ice_vsi_init(vsi, true);
+	if (ret)
+		goto unroll_get_qs;
+
+	switch (vsi->type) {
+	case ICE_VSI_CTRL:
+	case ICE_VSI_PF:
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+
+		ret = ice_vsi_setup_vector_base(vsi);
+		if (ret)
+			goto unroll_alloc_q_vector;
+
+		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		/* Always add VLAN ID 0 switch rule by default. This is needed
+		 * in order to allow all untagged and 0 tagged priority traffic
+		 * if Rx VLAN pruning is enabled. Also there are cases where we
+		 * don't get the call to add VLAN 0 via ice_vlan_rx_add_vid()
+		 * so this handles those cases (i.e. adding the PF to a bridge
+		 * without the 8021q module loaded).
+		 */
+		ret = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
+		if (ret)
+			goto unroll_clear_rings;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+
+		/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
+		if (vsi->type != ICE_VSI_CTRL)
+			/* Do not exit if configuring RSS had an issue, at
+			 * least receive traffic on first queue. Hence no
+			 * need to capture return value
+			 */
+			if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+				ice_vsi_cfg_rss_lut_key(vsi);
+				ice_vsi_set_rss_flow_fld(vsi);
+			}
+		ice_init_arfs(vsi);
+		break;
+	case ICE_VSI_VF:
+		/* VF driver will take care of creating netdev for this type and
+		 * map queues to vectors through Virtchnl, PF driver only
+		 * creates a VSI and corresponding structures for bookkeeping
+		 * purpose
+		 */
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_alloc_q_vector;
+
+		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+			ice_vsi_cfg_rss_lut_key(vsi);
+			ice_vsi_set_vf_rss_flow_fld(vsi);
+		}
+		break;
+	case ICE_VSI_LB:
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+		break;
+	default:
+		/* clean up the resources and exit */
+		goto unroll_vsi_init;
+	}
+
+	/* configure VSI nodes based on number of queues and TC's */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->alloc_txq;
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
+		dev_err(dev, "VSI %d failed lan queue config, error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+		goto unroll_clear_rings;
+	}
+
+	/* Add switch rule to drop all Tx Flow Control Frames, of look up
+	 * type ETHERTYPE from VSIs, and restrict malicious VF from sending
+	 * out PAUSE or PFC frames. If enabled, FW can still send FC frames.
+	 * The rule is added once for PF VSI in order to create appropriate
+	 * recipe, since VSI/VSI list is ignored with drop action...
+	 * Also add rules to handle LLDP Tx packets.  Tx LLDP packets need to
+	 * be dropped so that VFs cannot send LLDP packets to reconfig DCB
+	 * settings in the HW.
+	 */
+	if (!ice_is_safe_mode(pf))
+		if (vsi->type == ICE_VSI_PF) {
+			ice_fltr_add_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
+					 ICE_DROP_PACKET);
+			ice_cfg_sw_lldp(vsi, true, true);
+		}
+
+	return vsi;
+
+unroll_clear_rings:
+	ice_vsi_clear_rings(vsi);
+unroll_vector_base:
+	/* reclaim SW interrupts back to the common pool */
+	ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+	pf->num_avail_sw_msix += vsi->num_q_vectors;
+unroll_alloc_q_vector:
+	ice_vsi_free_q_vectors(vsi);
+unroll_vsi_init:
+	ice_vsi_delete(vsi);
+unroll_get_qs:
+	ice_vsi_put_qs(vsi);
+unroll_vsi_alloc:
+	ice_vsi_clear(vsi);
+
+	return NULL;
+}
+
+/**
+ * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW
+ * @vsi: the VSI being cleaned up
+ */
+static void ice_vsi_release_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 txq = 0;
+	u32 rxq = 0;
+	int i, q;
+
+	for (i = 0; i < vsi->num_q_vectors; i++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+		u16 reg_idx = q_vector->reg_idx;
+
+		wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0);
+		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+			if (ice_is_xdp_ena_vsi(vsi)) {
+				u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+				wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+			}
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
+			rxq++;
+		}
+	}
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_vsi_free_irq - Free the IRQ association with the OS
+ * @vsi: the VSI being configured
+ */
+void ice_vsi_free_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int base = vsi->base_vector;
+	int i;
+
+	if (!vsi->q_vectors || !vsi->irqs_ready)
+		return;
+
+	ice_vsi_release_msix(vsi);
+	if (vsi->type == ICE_VSI_VF)
+		return;
+
+	vsi->irqs_ready = false;
+	ice_for_each_q_vector(vsi, i) {
+		u16 vector = i + base;
+		int irq_num;
+
+		irq_num = pf->msix_entries[vector].vector;
+
+		/* free only the irqs that were actually requested */
+		if (!vsi->q_vectors[i] ||
+		    !(vsi->q_vectors[i]->num_ring_tx ||
+		      vsi->q_vectors[i]->num_ring_rx))
+			continue;
+
+		/* clear the affinity notifier in the IRQ descriptor */
+		irq_set_affinity_notifier(irq_num, NULL);
+
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(irq_num, NULL);
+		synchronize_irq(irq_num);
+		devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]);
+	}
+}
+
+/**
+ * ice_vsi_free_tx_rings - Free Tx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+void ice_vsi_free_tx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->tx_rings)
+		return;
+
+	ice_for_each_txq(vsi, i)
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+			ice_free_tx_ring(vsi->tx_rings[i]);
+}
+
+/**
+ * ice_vsi_free_rx_rings - Free Rx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->rx_rings)
+		return;
+
+	ice_for_each_rxq(vsi, i)
+		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
+			ice_free_rx_ring(vsi->rx_rings[i]);
+}
+
+/**
+ * ice_vsi_close - Shut down a VSI
+ * @vsi: the VSI being shut down
+ */
+void ice_vsi_close(struct ice_vsi *vsi)
+{
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state))
+		ice_down(vsi);
+
+	ice_vsi_free_irq(vsi);
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+}
+
+/**
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
+ * @locked: is the rtnl_lock already held
+ */
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
+{
+	int err = 0;
+
+	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
+		return 0;
+
+	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			err = ice_open_internal(vsi->netdev);
+
+			if (!locked)
+				rtnl_unlock();
+		}
+	} else if (vsi->type == ICE_VSI_CTRL) {
+		err = ice_vsi_open_ctrl(vsi);
+	}
+
+	return err;
+}
+
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ * @locked: is the rtnl_lock already held
+ */
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
+{
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	set_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			ice_vsi_close(vsi);
+
+			if (!locked)
+				rtnl_unlock();
+		} else {
+			ice_vsi_close(vsi);
+		}
+	} else if (vsi->type == ICE_VSI_CTRL) {
+		ice_vsi_close(vsi);
+	}
+}
+
+/**
+ * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ */
+void ice_vsi_dis_irq(struct ice_vsi *vsi)
+{
+	int base = vsi->base_vector;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+	int i;
+
+	/* disable interrupt causation from each queue */
+	if (vsi->tx_rings) {
+		ice_for_each_txq(vsi, i) {
+			if (vsi->tx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->tx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_TQCTL(reg));
+				val &= ~QINT_TQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_TQCTL(reg), val);
+			}
+		}
+	}
+
+	if (vsi->rx_rings) {
+		ice_for_each_rxq(vsi, i) {
+			if (vsi->rx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->rx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_RQCTL(reg));
+				val &= ~QINT_RQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_RQCTL(reg), val);
+			}
+		}
+	}
+
+	/* disable each interrupt */
+	ice_for_each_q_vector(vsi, i) {
+		if (!vsi->q_vectors[i])
+			continue;
+		wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0);
+	}
+
+	ice_flush(hw);
+
+	/* don't call synchronize_irq() for VF's from the host */
+	if (vsi->type == ICE_VSI_VF)
+		return;
+
+	ice_for_each_q_vector(vsi, i)
+		synchronize_irq(pf->msix_entries[i + base].vector);
+}
+
+/**
+ * ice_napi_del - Remove NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be removed
+ */
+void ice_napi_del(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		netif_napi_del(&vsi->q_vectors[v_idx]->napi);
+}
+
+/**
+ * ice_vsi_release - Delete a VSI and free its resources
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success or < 0 on error
+ */
+int ice_vsi_release(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf;
+
+	if (!vsi->back)
+		return -ENODEV;
+	pf = vsi->back;
+
+	/* do not unregister while driver is in the reset recovery pending
+	 * state. Since reset/rebuild happens through PF service task workqueue,
+	 * it's not a good idea to unregister netdev that is associated to the
+	 * PF that is running the work queue items currently. This is done to
+	 * avoid check_flush_dependency() warning on this wq
+	 */
+	if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) {
+		unregister_netdev(vsi->netdev);
+		ice_devlink_destroy_port(vsi);
+	}
+
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		ice_rss_clean(vsi);
+
+	/* Disable VSI and free resources */
+	if (vsi->type != ICE_VSI_LB)
+		ice_vsi_dis_irq(vsi);
+	ice_vsi_close(vsi);
+
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
+	if (vsi->type != ICE_VSI_VF) {
+		/* reclaim SW interrupts back to the common pool */
+		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+		pf->num_avail_sw_msix += vsi->num_q_vectors;
+	}
+
+	if (!ice_is_safe_mode(pf)) {
+		if (vsi->type == ICE_VSI_PF) {
+			ice_fltr_remove_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
+					    ICE_DROP_PACKET);
+			ice_cfg_sw_lldp(vsi, true, false);
+			/* The Rx rule will only exist to remove if the LLDP FW
+			 * engine is currently stopped
+			 */
+			if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+				ice_cfg_sw_lldp(vsi, false, false);
+		}
+	}
+
+	if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi))
+		ice_clear_dflt_vsi(pf->first_sw);
+	ice_fltr_remove_all(vsi);
+	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+	ice_vsi_delete(vsi);
+	ice_vsi_free_q_vectors(vsi);
+
+	/* make sure unregister_netdev() was called by checking __ICE_DOWN */
+	if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) {
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
+	ice_vsi_clear_rings(vsi);
+
+	ice_vsi_put_qs(vsi);
+
+	/* retain SW VSI data structure since it is needed to unregister and
+	 * free VSI netdev when PF is not in reset recovery pending state,\
+	 * for ex: during rmmod.
+	 */
+	if (!ice_is_reset_in_progress(pf->state))
+		ice_vsi_clear(vsi);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector
+ * @q_vector: pointer to q_vector which is being updated
+ * @stored_intrl_setting: original INTRL setting
+ *
+ * Set coalesce param in q_vector and update these parameters in HW.
+ */
+static void
+ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector,
+				      u16 stored_intrl_setting)
+{
+	struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+	q_vector->intrl = stored_intrl_setting;
+	wr32(hw, GLINT_RATE(q_vector->reg_idx),
+	     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+}
+
+/**
+ * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector
+ * @q_vector: pointer to q_vector which is being updated
+ * @rc: pointer to ring container
+ * @stored_itr_setting: original ITR setting
+ *
+ * Set coalesce param in q_vector and update these parameters in HW.
+ */
+static void
+ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector,
+				    struct ice_ring_container *rc,
+				    u16 stored_itr_setting)
+{
+	struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+	rc->itr_setting = stored_itr_setting;
+
+	/* dynamic ITR values will be updated during Tx/Rx */
+	if (!ITR_IS_DYNAMIC(rc->itr_setting))
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S);
+}
+
+/**
+ * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: array of struct with stored coalesce
+ *
+ * Returns array size.
+ */
+static int
+ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
+			     struct ice_coalesce_stored *coalesce)
+{
+	int i;
+
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		coalesce[i].itr_tx = q_vector->tx.itr_setting;
+		coalesce[i].itr_rx = q_vector->rx.itr_setting;
+		coalesce[i].intrl = q_vector->intrl;
+
+		if (i < vsi->num_txq)
+			coalesce[i].tx_valid = true;
+		if (i < vsi->num_rxq)
+			coalesce[i].rx_valid = true;
+	}
+
+	return vsi->num_q_vectors;
+}
+
+/**
+ * ice_vsi_rebuild_set_coalesce - set coalesce from earlier saved arrays
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: pointer to array of struct with stored coalesce
+ * @size: size of coalesce array
+ *
+ * Before this function, ice_vsi_rebuild_get_coalesce should be called to save
+ * ITR params in arrays. If size is 0 or coalesce wasn't stored set coalesce
+ * to default value.
+ */
+static void
+ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+			     struct ice_coalesce_stored *coalesce, int size)
+{
+	int i;
+
+	if ((size && !coalesce) || !vsi)
+		return;
+
+	/* There are a couple of cases that have to be handled here:
+	 *   1. The case where the number of queue vectors stays the same, but
+	 *      the number of Tx or Rx rings changes (the first for loop)
+	 *   2. The case where the number of queue vectors increased (the
+	 *      second for loop)
+	 */
+	for (i = 0; i < size && i < vsi->num_q_vectors; i++) {
+		/* There are 2 cases to handle here and they are the same for
+		 * both Tx and Rx:
+		 *   if the entry was valid previously (coalesce[i].[tr]x_valid
+		 *   and the loop variable is less than the number of rings
+		 *   allocated, then write the previous values
+		 *
+		 *   if the entry was not valid previously, but the number of
+		 *   rings is less than are allocated (this means the number of
+		 *   rings increased from previously), then write out the
+		 *   values in the first element
+		 */
+		if (i < vsi->alloc_rxq && coalesce[i].rx_valid)
+			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+							    &vsi->q_vectors[i]->rx,
+							    coalesce[i].itr_rx);
+		else if (i < vsi->alloc_rxq)
+			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+							    &vsi->q_vectors[i]->rx,
+							    coalesce[0].itr_rx);
+
+		if (i < vsi->alloc_txq && coalesce[i].tx_valid)
+			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+							    &vsi->q_vectors[i]->tx,
+							    coalesce[i].itr_tx);
+		else if (i < vsi->alloc_txq)
+			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+							    &vsi->q_vectors[i]->tx,
+							    coalesce[0].itr_tx);
+
+		ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i],
+						      coalesce[i].intrl);
+	}
+
+	/* the number of queue vectors increased so write whatever is in
+	 * the first element
+	 */
+	for (; i < vsi->num_q_vectors; i++) {
+		ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+						    &vsi->q_vectors[i]->tx,
+						    coalesce[0].itr_tx);
+		ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+						    &vsi->q_vectors[i]->rx,
+						    coalesce[0].itr_rx);
+		ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i],
+						      coalesce[0].intrl);
+	}
+}
+
+/**
+ * ice_vsi_rebuild - Rebuild VSI after reset
+ * @vsi: VSI to be rebuild
+ * @init_vsi: is this an initialization or a reconfigure of the VSI
+ *
+ * Returns 0 on success and negative value on failure
+ */
+int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_coalesce_stored *coalesce;
+	int prev_num_q_vectors = 0;
+	struct ice_vf *vf = NULL;
+	enum ice_status status;
+	struct ice_pf *pf;
+	int ret, i;
+
+	if (!vsi)
+		return -EINVAL;
+
+	pf = vsi->back;
+	if (vsi->type == ICE_VSI_VF)
+		vf = &pf->vf[vsi->vf_id];
+
+	coalesce = kcalloc(vsi->num_q_vectors,
+			   sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+	if (!coalesce)
+		return -ENOMEM;
+
+	prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+
+	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+	ice_vsi_free_q_vectors(vsi);
+
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
+	if (vsi->type != ICE_VSI_VF) {
+		/* reclaim SW interrupts back to the common pool */
+		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
+		pf->num_avail_sw_msix += vsi->num_q_vectors;
+		vsi->base_vector = 0;
+	}
+
+	if (ice_is_xdp_ena_vsi(vsi))
+		/* return value check can be skipped here, it always returns
+		 * 0 if reset is in progress
+		 */
+		ice_destroy_xdp_rings(vsi);
+	ice_vsi_put_qs(vsi);
+	ice_vsi_clear_rings(vsi);
+	ice_vsi_free_arrays(vsi);
+	if (vsi->type == ICE_VSI_VF)
+		ice_vsi_set_num_qs(vsi, vf->vf_id);
+	else
+		ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID);
+
+	ret = ice_vsi_alloc_arrays(vsi);
+	if (ret < 0)
+		goto err_vsi;
+
+	ice_vsi_get_qs(vsi);
+
+	ice_alloc_fd_res(vsi);
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* Initialize VSI struct elements and create VSI in FW */
+	ret = ice_vsi_init(vsi, init_vsi);
+	if (ret < 0)
+		goto err_vsi;
+
+	switch (vsi->type) {
+	case ICE_VSI_CTRL:
+	case ICE_VSI_PF:
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto err_rings;
+
+		ret = ice_vsi_setup_vector_base(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+		if (ice_is_xdp_ena_vsi(vsi)) {
+			vsi->num_xdp_txq = vsi->alloc_rxq;
+			ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+			if (ret)
+				goto err_vectors;
+		}
+		/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
+		if (vsi->type != ICE_VSI_CTRL)
+			/* Do not exit if configuring RSS had an issue, at
+			 * least receive traffic on first queue. Hence no
+			 * need to capture return value
+			 */
+			if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+				ice_vsi_cfg_rss_lut_key(vsi);
+		break;
+	case ICE_VSI_VF:
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto err_rings;
+
+		ret = ice_vsi_set_q_vectors_reg_idx(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto err_vectors;
+
+		break;
+	default:
+		break;
+	}
+
+	/* configure VSI nodes based on number of queues and TC's */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++) {
+		max_txqs[i] = vsi->alloc_txq;
+
+		if (ice_is_xdp_ena_vsi(vsi))
+			max_txqs[i] += vsi->num_xdp_txq;
+	}
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+		if (init_vsi) {
+			ret = -EIO;
+			goto err_vectors;
+		} else {
+			return ice_schedule_reset(pf, ICE_RESET_PFR);
+		}
+	}
+	ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
+	kfree(coalesce);
+
+	return 0;
+
+err_vectors:
+	ice_vsi_free_q_vectors(vsi);
+err_rings:
+	if (vsi->netdev) {
+		vsi->current_netdev_flags = 0;
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+err_vsi:
+	ice_vsi_clear(vsi);
+	set_bit(__ICE_RESET_FAILED, pf->state);
+	kfree(coalesce);
+	return ret;
+}
+
+/**
+ * ice_is_reset_in_progress - check for a reset in progress
+ * @state: PF state field
+ */
+bool ice_is_reset_in_progress(unsigned long *state)
+{
+	return test_bit(__ICE_RESET_OICR_RECV, state) ||
+	       test_bit(__ICE_PFR_REQ, state) ||
+	       test_bit(__ICE_CORER_REQ, state) ||
+	       test_bit(__ICE_GLOBR_REQ, state);
+}
+
+#ifdef CONFIG_DCB
+/**
+ * ice_vsi_update_q_map - update our copy of the VSI info with new queue map
+ * @vsi: VSI being configured
+ * @ctx: the context buffer returned from AQ VSI update command
+ */
+static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
+{
+	vsi->info.mapping_flags = ctx->info.mapping_flags;
+	memcpy(&vsi->info.q_mapping, &ctx->info.q_mapping,
+	       sizeof(vsi->info.q_mapping));
+	memcpy(&vsi->info.tc_mapping, ctx->info.tc_mapping,
+	       sizeof(vsi->info.tc_mapping));
+}
+
+/**
+ * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map
+ * @vsi: VSI to be configured
+ * @ena_tc: TC bitmap
+ *
+ * VSI queues expected to be quiesced before calling this function
+ */
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_pf *pf = vsi->back;
+	struct ice_vsi_ctx *ctx;
+	enum ice_status status;
+	struct device *dev;
+	int i, ret = 0;
+	u8 num_tc = 0;
+
+	dev = ice_pf_to_dev(pf);
+
+	ice_for_each_traffic_class(i) {
+		/* build bitmap of enabled TCs */
+		if (ena_tc & BIT(i))
+			num_tc++;
+		/* populate max_txqs per TC */
+		max_txqs[i] = vsi->alloc_txq;
+	}
+
+	vsi->tc_cfg.ena_tc = ena_tc;
+	vsi->tc_cfg.numtc = num_tc;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->vf_num = 0;
+	ctx->info = vsi->info;
+
+	ice_vsi_setup_q_map(vsi, ctx);
+
+	/* must to indicate which section of VSI context are being modified */
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+	status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
+	if (status) {
+		dev_info(dev, "Failed VSI Update\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+
+	if (status) {
+		dev_err(dev, "VSI %d failed TC config, error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+		ret = -EIO;
+		goto out;
+	}
+	ice_vsi_update_q_map(vsi, ctx);
+	vsi->info.valid_sections = 0;
+
+	ice_vsi_cfg_netdev_tc(vsi, ena_tc);
+out:
+	kfree(ctx);
+	return ret;
+}
+#endif /* CONFIG_DCB */
+
+/**
+ * ice_update_ring_stats - Update ring statistics
+ * @ring: ring to update
+ * @cont: used to increment per-vector counters
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void
+ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
+		      u64 pkts, u64 bytes)
+{
+	ring->stats.bytes += bytes;
+	ring->stats.pkts += pkts;
+	cont->total_bytes += bytes;
+	cont->total_pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&tx_ring->syncp);
+	ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
+	u64_stats_update_end(&tx_ring->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&rx_ring->syncp);
+	ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
+	u64_stats_update_end(&rx_ring->syncp);
+}
+
+/**
+ * ice_status_to_errno - convert from enum ice_status to Linux errno
+ * @err: ice_status value to convert
+ */
+int ice_status_to_errno(enum ice_status err)
+{
+	switch (err) {
+	case ICE_SUCCESS:
+		return 0;
+	case ICE_ERR_DOES_NOT_EXIST:
+		return -ENOENT;
+	case ICE_ERR_OUT_OF_RANGE:
+		return -ENOTTY;
+	case ICE_ERR_PARAM:
+		return -EINVAL;
+	case ICE_ERR_NO_MEMORY:
+		return -ENOMEM;
+	case ICE_ERR_MAX_LIMIT:
+		return -EAGAIN;
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used
+ * @sw: switch to check if its default forwarding VSI is free
+ *
+ * Return true if the default forwarding VSI is already being used, else returns
+ * false signalling that it's available to use.
+ */
+bool ice_is_dflt_vsi_in_use(struct ice_sw *sw)
+{
+	return (sw->dflt_vsi && sw->dflt_vsi_ena);
+}
+
+/**
+ * ice_is_vsi_dflt_vsi - check if the VSI passed in is the default VSI
+ * @sw: switch for the default forwarding VSI to compare against
+ * @vsi: VSI to compare against default forwarding VSI
+ *
+ * If this VSI passed in is the default forwarding VSI then return true, else
+ * return false
+ */
+bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
+{
+	return (sw->dflt_vsi == vsi && sw->dflt_vsi_ena);
+}
+
+/**
+ * ice_set_dflt_vsi - set the default forwarding VSI
+ * @sw: switch used to assign the default forwarding VSI
+ * @vsi: VSI getting set as the default forwarding VSI on the switch
+ *
+ * If the VSI passed in is already the default VSI and it's enabled just return
+ * success.
+ *
+ * If there is already a default VSI on the switch and it's enabled then return
+ * -EEXIST since there can only be one default VSI per switch.
+ *
+ *  Otherwise try to set the VSI passed in as the switch's default VSI and
+ *  return the result.
+ */
+int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
+{
+	enum ice_status status;
+	struct device *dev;
+
+	if (!sw || !vsi)
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	/* the VSI passed in is already the default VSI */
+	if (ice_is_vsi_dflt_vsi(sw, vsi)) {
+		dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n",
+			vsi->vsi_num);
+		return 0;
+	}
+
+	/* another VSI is already the default VSI for this switch */
+	if (ice_is_dflt_vsi_in_use(sw)) {
+		dev_err(dev, "Default forwarding VSI %d already in use, disable it and try again\n",
+			sw->dflt_vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX);
+	if (status) {
+		dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+		return -EIO;
+	}
+
+	sw->dflt_vsi = vsi;
+	sw->dflt_vsi_ena = true;
+
+	return 0;
+}
+
+/**
+ * ice_clear_dflt_vsi - clear the default forwarding VSI
+ * @sw: switch used to clear the default VSI
+ *
+ * If the switch has no default VSI or it's not enabled then return error.
+ *
+ * Otherwise try to clear the default VSI and return the result.
+ */
+int ice_clear_dflt_vsi(struct ice_sw *sw)
+{
+	struct ice_vsi *dflt_vsi;
+	enum ice_status status;
+	struct device *dev;
+
+	if (!sw)
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(sw->pf);
+
+	dflt_vsi = sw->dflt_vsi;
+
+	/* there is no default VSI configured */
+	if (!ice_is_dflt_vsi_in_use(sw))
+		return -ENODEV;
+
+	status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false,
+				  ICE_FLTR_RX);
+	if (status) {
+		dev_err(dev, "Failed to clear the default forwarding VSI %d, error %s\n",
+			dflt_vsi->vsi_num, ice_stat_str(status));
+		return -EIO;
+	}
+
+	sw->dflt_vsi = NULL;
+	sw->dflt_vsi_ena = false;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
new file mode 100644
index 000000000..3da17895a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LIB_H_
+#define _ICE_LIB_H_
+
+#include "ice.h"
+
+const char *ice_vsi_type_str(enum ice_vsi_type vsi_type);
+
+bool ice_pf_state_is_nominal(struct ice_pf *pf);
+
+void ice_update_eth_stats(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
+
+void ice_vsi_cfg_msix(struct ice_vsi *vsi);
+
+int
+ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action);
+
+int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
+
+int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi);
+
+int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena);
+
+int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi);
+
+int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi);
+
+int
+ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+			  u16 rel_vmvf_num);
+
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
+bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi);
+
+int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
+
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+
+#ifdef CONFIG_DCB
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
+#endif /* CONFIG_DCB */
+
+struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+	      enum ice_vsi_type vsi_type, u16 vf_id);
+
+void ice_napi_del(struct ice_vsi *vsi);
+
+int ice_vsi_release(struct ice_vsi *vsi);
+
+void ice_vsi_close(struct ice_vsi *vsi);
+
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked);
+
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
+
+int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id);
+
+int
+ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id);
+
+int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi);
+
+bool ice_is_reset_in_progress(unsigned long *state);
+
+void
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio);
+
+void ice_vsi_dis_irq(struct ice_vsi *vsi);
+
+void ice_vsi_free_irq(struct ice_vsi *vsi);
+
+void ice_vsi_free_rx_rings(struct ice_vsi *vsi);
+
+void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
+
+int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+
+void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
+
+int ice_status_to_errno(enum ice_status err);
+
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
+
+enum ice_status
+ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
+
+bool ice_is_safe_mode(struct ice_pf *pf);
+
+bool ice_is_dflt_vsi_in_use(struct ice_sw *sw);
+
+bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
+
+int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
+
+int ice_clear_dflt_vsi(struct ice_sw *sw);
+#endif /* !_ICE_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
new file mode 100644
index 000000000..02f72fbec
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -0,0 +1,6906 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Intel(R) Ethernet Connection E800 Series Linux Driver */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <generated/utsrelease.h>
+#include <linux/crash_dump.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include "ice_devlink.h"
+
+#define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
+static const char ice_driver_string[] = DRV_SUMMARY;
+static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
+
+/* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
+#define ICE_DDP_PKG_PATH	"intel/ice/ddp/"
+#define ICE_DDP_PKG_FILE	ICE_DDP_PKG_PATH "ice.pkg"
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL v2");
+MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
+
+static int debug = -1;
+module_param(debug, int, 0644);
+#ifndef CONFIG_DYNAMIC_DEBUG
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
+#else
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+static struct workqueue_struct *ice_wq;
+static const struct net_device_ops ice_netdev_safe_mode_ops;
+static const struct net_device_ops ice_netdev_ops;
+static int ice_vsi_open(struct ice_vsi *vsi);
+
+static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
+
+static void ice_vsi_release_all(struct ice_pf *pf);
+
+/**
+ * ice_get_tx_pending - returns number of Tx descriptors not processed
+ * @ring: the ring of descriptors
+ */
+static u16 ice_get_tx_pending(struct ice_ring *ring)
+{
+	u16 head, tail;
+
+	head = ring->next_to_clean;
+	tail = ring->next_to_use;
+
+	if (head != tail)
+		return (head < tail) ?
+			tail - head : (tail + ring->count - head);
+	return 0;
+}
+
+/**
+ * ice_check_for_hang_subtask - check for and recover hung queues
+ * @pf: pointer to PF struct
+ */
+static void ice_check_for_hang_subtask(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = NULL;
+	struct ice_hw *hw;
+	unsigned int i;
+	int packets;
+	u32 v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
+			vsi = pf->vsi[v];
+			break;
+		}
+
+	if (!vsi || test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
+		return;
+
+	hw = &vsi->back->hw;
+
+	for (i = 0; i < vsi->num_txq; i++) {
+		struct ice_ring *tx_ring = vsi->tx_rings[i];
+
+		if (tx_ring && tx_ring->desc) {
+			/* If packet counter has not changed the queue is
+			 * likely stalled, so force an interrupt for this
+			 * queue.
+			 *
+			 * prev_pkt would be negative if there was no
+			 * pending work.
+			 */
+			packets = tx_ring->stats.pkts & INT_MAX;
+			if (tx_ring->tx_stats.prev_pkt == packets) {
+				/* Trigger sw interrupt to revive the queue */
+				ice_trigger_sw_intr(hw, tx_ring->q_vector);
+				continue;
+			}
+
+			/* Memory barrier between read of packet count and call
+			 * to ice_get_tx_pending()
+			 */
+			smp_rmb();
+			tx_ring->tx_stats.prev_pkt =
+			    ice_get_tx_pending(tx_ring) ? packets : -1;
+		}
+	}
+}
+
+/**
+ * ice_init_mac_fltr - Set initial MAC filters
+ * @pf: board private structure
+ *
+ * Set initial set of MAC filters for PF VSI; configure filters for permanent
+ * address and broadcast address. If an error is encountered, netdevice will be
+ * unregistered.
+ */
+static int ice_init_mac_fltr(struct ice_pf *pf)
+{
+	enum ice_status status;
+	struct ice_vsi *vsi;
+	u8 *perm_addr;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return -EINVAL;
+
+	perm_addr = vsi->port_info->mac.perm_addr;
+	status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
+	if (!status)
+		return 0;
+
+	/* We aren't useful with no MAC filters, so unregister if we
+	 * had an error
+	 */
+	if (vsi->netdev->reg_state == NETREG_REGISTERED) {
+		dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n",
+			ice_stat_str(status));
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
+	return -EIO;
+}
+
+/**
+ * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
+ * @netdev: the net device on which the sync is happening
+ * @addr: MAC address to sync
+ *
+ * This is a callback function which is called by the in kernel device sync
+ * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
+ * populates the tmp_sync_list, which is later used by ice_add_mac to add the
+ * MAC filters from the hardware.
+ */
+static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr,
+				     ICE_FWD_TO_VSI))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
+ * @netdev: the net device on which the unsync is happening
+ * @addr: MAC address to unsync
+ *
+ * This is a callback function which is called by the in kernel device unsync
+ * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
+ * populates the tmp_unsync_list, which is later used by ice_remove_mac to
+ * delete the MAC filters from the hardware.
+ */
+static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	/* Under some circumstances, we might receive a request to delete our
+	 * own device address from our uc list. Because we store the device
+	 * address in the VSI's MAC filter list, we need to ignore such
+	 * requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
+	if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
+				     ICE_FWD_TO_VSI))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_fltr_changed - check if filter state changed
+ * @vsi: VSI to be checked
+ *
+ * returns true if filter state has changed, false otherwise.
+ */
+static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
+{
+	return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) ||
+	       test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) ||
+	       test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+}
+
+/**
+ * ice_cfg_promisc - Enable or disable promiscuous mode for a given PF
+ * @vsi: the VSI being configured
+ * @promisc_m: mask of promiscuous config bits
+ * @set_promisc: enable or disable promisc flag request
+ *
+ */
+static int ice_cfg_promisc(struct ice_vsi *vsi, u8 promisc_m, bool set_promisc)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	enum ice_status status = 0;
+
+	if (vsi->type != ICE_VSI_PF)
+		return 0;
+
+	if (vsi->vlan_ena) {
+		status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m,
+						  set_promisc);
+	} else {
+		if (set_promisc)
+			status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
+						     0);
+		else
+			status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
+						       0);
+	}
+
+	if (status)
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_sync_fltr - Update the VSI filter list to the HW
+ * @vsi: ptr to the VSI
+ *
+ * Push any outstanding VSI filter changes through the AdminQ.
+ */
+static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct net_device *netdev = vsi->netdev;
+	bool promisc_forced_on = false;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status = 0;
+	u32 changed_flags = 0;
+	u8 promisc_m;
+	int err = 0;
+
+	if (!vsi->netdev)
+		return -EINVAL;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state))
+		usleep_range(1000, 2000);
+
+	changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
+	vsi->current_netdev_flags = vsi->netdev->flags;
+
+	INIT_LIST_HEAD(&vsi->tmp_sync_list);
+	INIT_LIST_HEAD(&vsi->tmp_unsync_list);
+
+	if (ice_vsi_fltr_changed(vsi)) {
+		clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+		clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+		clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+
+		/* grab the netdev's addr_list_lock */
+		netif_addr_lock_bh(netdev);
+		__dev_uc_sync(netdev, ice_add_mac_to_sync_list,
+			      ice_add_mac_to_unsync_list);
+		__dev_mc_sync(netdev, ice_add_mac_to_sync_list,
+			      ice_add_mac_to_unsync_list);
+		/* our temp lists are populated. release lock */
+		netif_addr_unlock_bh(netdev);
+	}
+
+	/* Remove MAC addresses in the unsync list */
+	status = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list);
+	ice_fltr_free_list(dev, &vsi->tmp_unsync_list);
+	if (status) {
+		netdev_err(netdev, "Failed to delete MAC filters\n");
+		/* if we failed because of alloc failures, just bail */
+		if (status == ICE_ERR_NO_MEMORY) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	/* Add MAC addresses in the sync list */
+	status = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list);
+	ice_fltr_free_list(dev, &vsi->tmp_sync_list);
+	/* If filter is added successfully or already exists, do not go into
+	 * 'if' condition and report it as error. Instead continue processing
+	 * rest of the function.
+	 */
+	if (status && status != ICE_ERR_ALREADY_EXISTS) {
+		netdev_err(netdev, "Failed to add MAC filters\n");
+		/* If there is no more space for new umac filters, VSI
+		 * should go into promiscuous mode. There should be some
+		 * space reserved for promiscuous filters.
+		 */
+		if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
+		    !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC,
+				      vsi->state)) {
+			promisc_forced_on = true;
+			netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
+				    vsi->vsi_num);
+		} else {
+			err = -EIO;
+			goto out;
+		}
+	}
+	/* check for changes in promiscuous modes */
+	if (changed_flags & IFF_ALLMULTI) {
+		if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+			if (vsi->vlan_ena)
+				promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_MCAST_PROMISC_BITS;
+
+			err = ice_cfg_promisc(vsi, promisc_m, true);
+			if (err) {
+				netdev_err(netdev, "Error setting Multicast promiscuous mode on VSI %i\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags &= ~IFF_ALLMULTI;
+				goto out_promisc;
+			}
+		} else {
+			/* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
+			if (vsi->vlan_ena)
+				promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_MCAST_PROMISC_BITS;
+
+			err = ice_cfg_promisc(vsi, promisc_m, false);
+			if (err) {
+				netdev_err(netdev, "Error clearing Multicast promiscuous mode on VSI %i\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags |= IFF_ALLMULTI;
+				goto out_promisc;
+			}
+		}
+	}
+
+	if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
+	    test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) {
+		clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
+		if (vsi->current_netdev_flags & IFF_PROMISC) {
+			/* Apply Rx filter rule to get traffic from wire */
+			if (!ice_is_dflt_vsi_in_use(pf->first_sw)) {
+				err = ice_set_dflt_vsi(pf->first_sw, vsi);
+				if (err && err != -EEXIST) {
+					netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n",
+						   err, vsi->vsi_num);
+					vsi->current_netdev_flags &=
+						~IFF_PROMISC;
+					goto out_promisc;
+				}
+				ice_cfg_vlan_pruning(vsi, false, false);
+			}
+		} else {
+			/* Clear Rx filter to remove traffic from wire */
+			if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) {
+				err = ice_clear_dflt_vsi(pf->first_sw);
+				if (err) {
+					netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
+						   err, vsi->vsi_num);
+					vsi->current_netdev_flags |=
+						IFF_PROMISC;
+					goto out_promisc;
+				}
+				if (vsi->num_vlan > 1)
+					ice_cfg_vlan_pruning(vsi, true, false);
+			}
+		}
+	}
+	goto exit;
+
+out_promisc:
+	set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
+	goto exit;
+out:
+	/* if something went wrong then set the changed flag so we try again */
+	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+exit:
+	clear_bit(__ICE_CFG_BUSY, vsi->state);
+	return err;
+}
+
+/**
+ * ice_sync_fltr_subtask - Sync the VSI filter list with HW
+ * @pf: board private structure
+ */
+static void ice_sync_fltr_subtask(struct ice_pf *pf)
+{
+	int v;
+
+	if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
+		return;
+
+	clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
+		    ice_vsi_sync_fltr(pf->vsi[v])) {
+			/* come back and try again later */
+			set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+			break;
+		}
+}
+
+/**
+ * ice_pf_dis_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ * @locked: is the rtnl_lock already held
+ */
+static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
+{
+	int v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_dis_vsi(pf->vsi[v], locked);
+}
+
+/**
+ * ice_prepare_for_reset - prep for the core to reset
+ * @pf: board private structure
+ *
+ * Inform or close all dependent features in prep for reset.
+ */
+static void
+ice_prepare_for_reset(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	unsigned int i;
+
+	/* already prepared for reset */
+	if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state))
+		return;
+
+	/* Notify VFs of impending reset */
+	if (ice_check_sq_alive(hw, &hw->mailboxq))
+		ice_vc_notify_reset(pf);
+
+	/* Disable VFs until reset is completed */
+	ice_for_each_vf(pf, i)
+		ice_set_vf_state_qs_dis(&pf->vf[i]);
+
+	/* clear SW filtering DB */
+	ice_clear_hw_tbls(hw);
+	/* disable the VSIs and their queues that are not already DOWN */
+	ice_pf_dis_all_vsi(pf, false);
+
+	if (hw->port_info)
+		ice_sched_clear_port(hw->port_info);
+
+	ice_shutdown_all_ctrlq(hw);
+
+	set_bit(__ICE_PREPARED_FOR_RESET, pf->state);
+}
+
+/**
+ * ice_do_reset - Initiate one of many types of resets
+ * @pf: board private structure
+ * @reset_type: reset type requested
+ * before this function was called.
+ */
+static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+
+	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
+
+	ice_prepare_for_reset(pf);
+
+	/* trigger the reset */
+	if (ice_reset(hw, reset_type)) {
+		dev_err(dev, "reset %d failed\n", reset_type);
+		set_bit(__ICE_RESET_FAILED, pf->state);
+		clear_bit(__ICE_RESET_OICR_RECV, pf->state);
+		clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
+		clear_bit(__ICE_PFR_REQ, pf->state);
+		clear_bit(__ICE_CORER_REQ, pf->state);
+		clear_bit(__ICE_GLOBR_REQ, pf->state);
+		return;
+	}
+
+	/* PFR is a bit of a special case because it doesn't result in an OICR
+	 * interrupt. So for PFR, rebuild after the reset and clear the reset-
+	 * associated state bits.
+	 */
+	if (reset_type == ICE_RESET_PFR) {
+		pf->pfr_count++;
+		ice_rebuild(pf, reset_type);
+		clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
+		clear_bit(__ICE_PFR_REQ, pf->state);
+		ice_reset_all_vfs(pf, true);
+	}
+}
+
+/**
+ * ice_reset_subtask - Set up for resetting the device and driver
+ * @pf: board private structure
+ */
+static void ice_reset_subtask(struct ice_pf *pf)
+{
+	enum ice_reset_req reset_type = ICE_RESET_INVAL;
+
+	/* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
+	 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
+	 * of reset is pending and sets bits in pf->state indicating the reset
+	 * type and __ICE_RESET_OICR_RECV. So, if the latter bit is set
+	 * prepare for pending reset if not already (for PF software-initiated
+	 * global resets the software should already be prepared for it as
+	 * indicated by __ICE_PREPARED_FOR_RESET; for global resets initiated
+	 * by firmware or software on other PFs, that bit is not set so prepare
+	 * for the reset now), poll for reset done, rebuild and return.
+	 */
+	if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) {
+		/* Perform the largest reset requested */
+		if (test_and_clear_bit(__ICE_CORER_RECV, pf->state))
+			reset_type = ICE_RESET_CORER;
+		if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state))
+			reset_type = ICE_RESET_GLOBR;
+		if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state))
+			reset_type = ICE_RESET_EMPR;
+		/* return if no valid reset type requested */
+		if (reset_type == ICE_RESET_INVAL)
+			return;
+		ice_prepare_for_reset(pf);
+
+		/* make sure we are ready to rebuild */
+		if (ice_check_reset(&pf->hw)) {
+			set_bit(__ICE_RESET_FAILED, pf->state);
+		} else {
+			/* done with reset. start rebuild */
+			pf->hw.reset_ongoing = false;
+			ice_rebuild(pf, reset_type);
+			/* clear bit to resume normal operations, but
+			 * ICE_NEEDS_RESTART bit is set in case rebuild failed
+			 */
+			clear_bit(__ICE_RESET_OICR_RECV, pf->state);
+			clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
+			clear_bit(__ICE_PFR_REQ, pf->state);
+			clear_bit(__ICE_CORER_REQ, pf->state);
+			clear_bit(__ICE_GLOBR_REQ, pf->state);
+			ice_reset_all_vfs(pf, true);
+		}
+
+		return;
+	}
+
+	/* No pending resets to finish processing. Check for new resets */
+	if (test_bit(__ICE_PFR_REQ, pf->state))
+		reset_type = ICE_RESET_PFR;
+	if (test_bit(__ICE_CORER_REQ, pf->state))
+		reset_type = ICE_RESET_CORER;
+	if (test_bit(__ICE_GLOBR_REQ, pf->state))
+		reset_type = ICE_RESET_GLOBR;
+	/* If no valid reset type requested just return */
+	if (reset_type == ICE_RESET_INVAL)
+		return;
+
+	/* reset if not already down or busy */
+	if (!test_bit(__ICE_DOWN, pf->state) &&
+	    !test_bit(__ICE_CFG_BUSY, pf->state)) {
+		ice_do_reset(pf, reset_type);
+	}
+}
+
+/**
+ * ice_print_topo_conflict - print topology conflict message
+ * @vsi: the VSI whose topology status is being checked
+ */
+static void ice_print_topo_conflict(struct ice_vsi *vsi)
+{
+	switch (vsi->port_info->phy.link_info.topo_media_conflict) {
+	case ICE_AQ_LINK_TOPO_CONFLICT:
+	case ICE_AQ_LINK_MEDIA_CONFLICT:
+	case ICE_AQ_LINK_TOPO_UNREACH_PRT:
+	case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
+	case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
+		netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n");
+		break;
+	case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
+		netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_print_link_msg - print link up or down message
+ * @vsi: the VSI whose link status is being queried
+ * @isup: boolean for if the link is now up or down
+ */
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
+{
+	struct ice_aqc_get_phy_caps_data *caps;
+	const char *an_advertised;
+	enum ice_status status;
+	const char *fec_req;
+	const char *speed;
+	const char *fec;
+	const char *fc;
+	const char *an;
+
+	if (!vsi)
+		return;
+
+	if (vsi->current_isup == isup)
+		return;
+
+	vsi->current_isup = isup;
+
+	if (!isup) {
+		netdev_info(vsi->netdev, "NIC Link is Down\n");
+		return;
+	}
+
+	switch (vsi->port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		speed = "100 G";
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		speed = "50 G";
+		break;
+	case ICE_AQ_LINK_SPEED_40GB:
+		speed = "40 G";
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		speed = "25 G";
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		speed = "20 G";
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+		speed = "10 G";
+		break;
+	case ICE_AQ_LINK_SPEED_5GB:
+		speed = "5 G";
+		break;
+	case ICE_AQ_LINK_SPEED_2500MB:
+		speed = "2.5 G";
+		break;
+	case ICE_AQ_LINK_SPEED_1000MB:
+		speed = "1 G";
+		break;
+	case ICE_AQ_LINK_SPEED_100MB:
+		speed = "100 M";
+		break;
+	default:
+		speed = "Unknown";
+		break;
+	}
+
+	switch (vsi->port_info->fc.current_mode) {
+	case ICE_FC_FULL:
+		fc = "Rx/Tx";
+		break;
+	case ICE_FC_TX_PAUSE:
+		fc = "Tx";
+		break;
+	case ICE_FC_RX_PAUSE:
+		fc = "Rx";
+		break;
+	case ICE_FC_NONE:
+		fc = "None";
+		break;
+	default:
+		fc = "Unknown";
+		break;
+	}
+
+	/* Get FEC mode based on negotiated link info */
+	switch (vsi->port_info->phy.link_info.fec_info) {
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fec = "RS-FEC";
+		break;
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fec = "FC-FEC/BASE-R";
+		break;
+	default:
+		fec = "NONE";
+		break;
+	}
+
+	/* check if autoneg completed, might be false due to not supported */
+	if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
+		an = "True";
+	else
+		an = "False";
+
+	/* Get FEC mode requested based on PHY caps last SW configuration */
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps) {
+		fec_req = "Unknown";
+		an_advertised = "Unknown";
+		goto done;
+	}
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
+	if (status)
+		netdev_info(vsi->netdev, "Get phy capability failed.\n");
+
+	an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		fec_req = "RS-FEC";
+	else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+		 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fec_req = "FC-FEC/BASE-R";
+	else
+		fec_req = "NONE";
+
+	kfree(caps);
+
+done:
+	netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
+		    speed, fec_req, fec, an_advertised, an, fc);
+	ice_print_topo_conflict(vsi);
+}
+
+/**
+ * ice_vsi_link_event - update the VSI's netdev
+ * @vsi: the VSI on which the link event occurred
+ * @link_up: whether or not the VSI needs to be set up or down
+ */
+static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
+{
+	if (!vsi)
+		return;
+
+	if (test_bit(__ICE_DOWN, vsi->state) || !vsi->netdev)
+		return;
+
+	if (vsi->type == ICE_VSI_PF) {
+		if (link_up == netif_carrier_ok(vsi->netdev))
+			return;
+
+		if (link_up) {
+			netif_carrier_on(vsi->netdev);
+			netif_tx_wake_all_queues(vsi->netdev);
+		} else {
+			netif_carrier_off(vsi->netdev);
+			netif_tx_stop_all_queues(vsi->netdev);
+		}
+	}
+}
+
+/**
+ * ice_set_dflt_mib - send a default config MIB to the FW
+ * @pf: private PF struct
+ *
+ * This function sends a default configuration MIB to the FW.
+ *
+ * If this function errors out at any point, the driver is still able to
+ * function.  The main impact is that LFC may not operate as expected.
+ * Therefore an error state in this function should be treated with a DBG
+ * message and continue on with driver rebuild/reenable.
+ */
+static void ice_set_dflt_mib(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	u8 mib_type, *buf, *lldpmib = NULL;
+	u16 len, typelen, offset = 0;
+	struct ice_lldp_org_tlv *tlv;
+	struct ice_hw *hw;
+	u32 ouisubtype;
+
+	if (!pf) {
+		dev_dbg(dev, "%s NULL pf pointer\n", __func__);
+		return;
+	}
+
+	hw = &pf->hw;
+	mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+	lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
+	if (!lldpmib) {
+		dev_dbg(dev, "%s Failed to allocate MIB memory\n",
+			__func__);
+		return;
+	}
+
+	/* Add ETS CFG TLV */
+	tlv = (struct ice_lldp_org_tlv *)lldpmib;
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_ETS_TLV_LEN);
+	tlv->typelen = htons(typelen);
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	buf = tlv->tlvinfo;
+	buf[0] = 0;
+
+	/* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
+	 * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
+	 * Octets 13 - 20 are TSA values - leave as zeros
+	 */
+	buf[5] = 0x64;
+	len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+	offset += len + 2;
+	tlv = (struct ice_lldp_org_tlv *)
+		((char *)tlv + sizeof(tlv->typelen) + len);
+
+	/* Add ETS REC TLV */
+	buf = tlv->tlvinfo;
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_REC);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* First octet of buf is reserved
+	 * Octets 1 - 4 map UP to TC - all UPs map to zero
+	 * Octets 5 - 12 are BW values - set TC 0 to 100%.
+	 * Octets 13 - 20 are TSA value - leave as zeros
+	 */
+	buf[5] = 0x64;
+	offset += len + 2;
+	tlv = (struct ice_lldp_org_tlv *)
+		((char *)tlv + sizeof(tlv->typelen) + len);
+
+	/* Add PFC CFG TLV */
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_PFC_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_PFC_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* Octet 1 left as all zeros - PFC disabled */
+	buf[0] = 0x08;
+	len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+	offset += len + 2;
+
+	if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
+		dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
+
+	kfree(lldpmib);
+}
+
+/**
+ * ice_link_event - process the link event
+ * @pf: PF that the link event is associated with
+ * @pi: port_info for the port that the link event is associated with
+ * @link_up: true if the physical link is up and false if it is down
+ * @link_speed: current link speed received from the link event
+ *
+ * Returns 0 on success and negative on failure
+ */
+static int
+ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
+	       u16 link_speed)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_phy_info *phy_info;
+	struct ice_vsi *vsi;
+	u16 old_link_speed;
+	bool old_link;
+	int result;
+
+	phy_info = &pi->phy;
+	phy_info->link_info_old = phy_info->link_info;
+
+	old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
+	old_link_speed = phy_info->link_info_old.link_speed;
+
+	/* update the link info structures and re-enable link events,
+	 * don't bail on failure due to other book keeping needed
+	 */
+	result = ice_update_link_info(pi);
+	if (result)
+		dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n",
+			pi->lport);
+
+	/* Check if the link state is up after updating link info, and treat
+	 * this event as an UP event since the link is actually UP now.
+	 */
+	if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
+		link_up = true;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi || !vsi->port_info)
+		return -EINVAL;
+
+	/* turn off PHY if media was removed */
+	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
+	    !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
+		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+
+		result = ice_aq_set_link_restart_an(pi, false, NULL);
+		if (result) {
+			dev_dbg(dev, "Failed to set link down, VSI %d error %d\n",
+				vsi->vsi_num, result);
+			return result;
+		}
+	}
+
+	/* if the old link up/down and speed is the same as the new */
+	if (link_up == old_link && link_speed == old_link_speed)
+		return result;
+
+	if (ice_is_dcb_active(pf)) {
+		if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+			ice_dcb_rebuild(pf);
+	} else {
+		if (link_up)
+			ice_set_dflt_mib(pf);
+	}
+	ice_vsi_link_event(vsi, link_up);
+	ice_print_link_msg(vsi, link_up);
+
+	ice_vc_notify_link_state(pf);
+
+	return result;
+}
+
+/**
+ * ice_watchdog_subtask - periodic tasks not using event driven scheduling
+ * @pf: board private structure
+ */
+static void ice_watchdog_subtask(struct ice_pf *pf)
+{
+	int i;
+
+	/* if interface is down do nothing */
+	if (test_bit(__ICE_DOWN, pf->state) ||
+	    test_bit(__ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* make sure we don't do these things too often */
+	if (time_before(jiffies,
+			pf->serv_tmr_prev + pf->serv_tmr_period))
+		return;
+
+	pf->serv_tmr_prev = jiffies;
+
+	/* Update the stats for active netdevs so the network stack
+	 * can look at updated numbers whenever it cares to
+	 */
+	ice_update_pf_stats(pf);
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && pf->vsi[i]->netdev)
+			ice_update_vsi_stats(pf->vsi[i]);
+}
+
+/**
+ * ice_init_link_events - enable/initialize link events
+ * @pi: pointer to the port_info instance
+ *
+ * Returns -EIO on failure, 0 on success
+ */
+static int ice_init_link_events(struct ice_port_info *pi)
+{
+	u16 mask;
+
+	mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
+		       ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL));
+
+	if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
+		dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
+			pi->lport);
+		return -EIO;
+	}
+
+	if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
+		dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
+			pi->lport);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_handle_link_event - handle link event via ARQ
+ * @pf: PF that the link event is associated with
+ * @event: event structure containing link status info
+ */
+static int
+ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+	struct ice_aqc_get_link_status_data *link_data;
+	struct ice_port_info *port_info;
+	int status;
+
+	link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
+	port_info = pf->hw.port_info;
+	if (!port_info)
+		return -EINVAL;
+
+	status = ice_link_event(pf, port_info,
+				!!(link_data->link_info & ICE_AQ_LINK_UP),
+				le16_to_cpu(link_data->link_speed));
+	if (status)
+		dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
+			status);
+
+	return status;
+}
+
+enum ice_aq_task_state {
+	ICE_AQ_TASK_WAITING = 0,
+	ICE_AQ_TASK_COMPLETE,
+	ICE_AQ_TASK_CANCELED,
+};
+
+struct ice_aq_task {
+	struct hlist_node entry;
+
+	u16 opcode;
+	struct ice_rq_event_info *event;
+	enum ice_aq_task_state state;
+};
+
+/**
+ * ice_wait_for_aq_event - Wait for an AdminQ event from firmware
+ * @pf: pointer to the PF private structure
+ * @opcode: the opcode to wait for
+ * @timeout: how long to wait, in jiffies
+ * @event: storage for the event info
+ *
+ * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
+ * current thread will be put to sleep until the specified event occurs or
+ * until the given timeout is reached.
+ *
+ * To obtain only the descriptor contents, pass an event without an allocated
+ * msg_buf. If the complete data buffer is desired, allocate the
+ * event->msg_buf with enough space ahead of time.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
+			  struct ice_rq_event_info *event)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_aq_task *task;
+	unsigned long start;
+	long ret;
+	int err;
+
+	task = kzalloc(sizeof(*task), GFP_KERNEL);
+	if (!task)
+		return -ENOMEM;
+
+	INIT_HLIST_NODE(&task->entry);
+	task->opcode = opcode;
+	task->event = event;
+	task->state = ICE_AQ_TASK_WAITING;
+
+	spin_lock_bh(&pf->aq_wait_lock);
+	hlist_add_head(&task->entry, &pf->aq_wait_list);
+	spin_unlock_bh(&pf->aq_wait_lock);
+
+	start = jiffies;
+
+	ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
+					       timeout);
+	switch (task->state) {
+	case ICE_AQ_TASK_WAITING:
+		err = ret < 0 ? ret : -ETIMEDOUT;
+		break;
+	case ICE_AQ_TASK_CANCELED:
+		err = ret < 0 ? ret : -ECANCELED;
+		break;
+	case ICE_AQ_TASK_COMPLETE:
+		err = ret < 0 ? ret : 0;
+		break;
+	default:
+		WARN(1, "Unexpected AdminQ wait task state %u", task->state);
+		err = -EINVAL;
+		break;
+	}
+
+	dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
+		jiffies_to_msecs(jiffies - start),
+		jiffies_to_msecs(timeout),
+		opcode);
+
+	spin_lock_bh(&pf->aq_wait_lock);
+	hlist_del(&task->entry);
+	spin_unlock_bh(&pf->aq_wait_lock);
+	kfree(task);
+
+	return err;
+}
+
+/**
+ * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
+ * @pf: pointer to the PF private structure
+ * @opcode: the opcode of the event
+ * @event: the event to check
+ *
+ * Loops over the current list of pending threads waiting for an AdminQ event.
+ * For each matching task, copy the contents of the event into the task
+ * structure and wake up the thread.
+ *
+ * If multiple threads wait for the same opcode, they will all be woken up.
+ *
+ * Note that event->msg_buf will only be duplicated if the event has a buffer
+ * with enough space already allocated. Otherwise, only the descriptor and
+ * message length will be copied.
+ *
+ * Returns: true if an event was found, false otherwise
+ */
+static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
+				struct ice_rq_event_info *event)
+{
+	struct ice_rq_event_info *task_ev;
+	struct ice_aq_task *task;
+	bool found = false;
+
+	spin_lock_bh(&pf->aq_wait_lock);
+	hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
+		if (task->state || task->opcode != opcode)
+			continue;
+
+		task_ev = task->event;
+		memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
+		task_ev->msg_len = event->msg_len;
+
+		/* Only copy the data buffer if a destination was set */
+		if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
+			memcpy(task_ev->msg_buf, event->msg_buf,
+			       event->buf_len);
+			task_ev->buf_len = event->buf_len;
+		}
+
+		task->state = ICE_AQ_TASK_COMPLETE;
+		found = true;
+	}
+	spin_unlock_bh(&pf->aq_wait_lock);
+
+	if (found)
+		wake_up(&pf->aq_wait_queue);
+}
+
+/**
+ * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
+ * @pf: the PF private structure
+ *
+ * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
+ * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
+ */
+static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
+{
+	struct ice_aq_task *task;
+
+	spin_lock_bh(&pf->aq_wait_lock);
+	hlist_for_each_entry(task, &pf->aq_wait_list, entry)
+		task->state = ICE_AQ_TASK_CANCELED;
+	spin_unlock_bh(&pf->aq_wait_lock);
+
+	wake_up(&pf->aq_wait_queue);
+}
+
+/**
+ * __ice_clean_ctrlq - helper function to clean controlq rings
+ * @pf: ptr to struct ice_pf
+ * @q_type: specific Control queue type
+ */
+static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_rq_event_info event;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_ctl_q_info *cq;
+	u16 pending, i = 0;
+	const char *qtype;
+	u32 oldval, val;
+
+	/* Do not clean control queue if/when PF reset fails */
+	if (test_bit(__ICE_RESET_FAILED, pf->state))
+		return 0;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		qtype = "Admin";
+		break;
+	case ICE_CTL_Q_MAILBOX:
+		cq = &hw->mailboxq;
+		qtype = "Mailbox";
+		break;
+	default:
+		dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
+		return 0;
+	}
+
+	/* check for error indications - PF_xx_AxQLEN register layout for
+	 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
+	 */
+	val = rd32(hw, cq->rq.len);
+	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+		   PF_FW_ARQLEN_ARQCRIT_M)) {
+		oldval = val;
+		if (val & PF_FW_ARQLEN_ARQVFE_M)
+			dev_dbg(dev, "%s Receive Queue VF Error detected\n",
+				qtype);
+		if (val & PF_FW_ARQLEN_ARQOVFL_M) {
+			dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
+				qtype);
+		}
+		if (val & PF_FW_ARQLEN_ARQCRIT_M)
+			dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
+				qtype);
+		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+			 PF_FW_ARQLEN_ARQCRIT_M);
+		if (oldval != val)
+			wr32(hw, cq->rq.len, val);
+	}
+
+	val = rd32(hw, cq->sq.len);
+	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+		   PF_FW_ATQLEN_ATQCRIT_M)) {
+		oldval = val;
+		if (val & PF_FW_ATQLEN_ATQVFE_M)
+			dev_dbg(dev, "%s Send Queue VF Error detected\n",
+				qtype);
+		if (val & PF_FW_ATQLEN_ATQOVFL_M) {
+			dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
+				qtype);
+		}
+		if (val & PF_FW_ATQLEN_ATQCRIT_M)
+			dev_dbg(dev, "%s Send Queue Critical Error detected\n",
+				qtype);
+		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+			 PF_FW_ATQLEN_ATQCRIT_M);
+		if (oldval != val)
+			wr32(hw, cq->sq.len, val);
+	}
+
+	event.buf_len = cq->rq_buf_size;
+	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
+	if (!event.msg_buf)
+		return 0;
+
+	do {
+		enum ice_status ret;
+		u16 opcode;
+
+		ret = ice_clean_rq_elem(hw, cq, &event, &pending);
+		if (ret == ICE_ERR_AQ_NO_WORK)
+			break;
+		if (ret) {
+			dev_err(dev, "%s Receive Queue event error %s\n", qtype,
+				ice_stat_str(ret));
+			break;
+		}
+
+		opcode = le16_to_cpu(event.desc.opcode);
+
+		/* Notify any thread that might be waiting for this event */
+		ice_aq_check_events(pf, opcode, &event);
+
+		switch (opcode) {
+		case ice_aqc_opc_get_link_status:
+			if (ice_handle_link_event(pf, &event))
+				dev_err(dev, "Could not handle link event\n");
+			break;
+		case ice_aqc_opc_event_lan_overflow:
+			ice_vf_lan_overflow_event(pf, &event);
+			break;
+		case ice_mbx_opc_send_msg_to_pf:
+			ice_vc_process_vf_msg(pf, &event);
+			break;
+		case ice_aqc_opc_fw_logging:
+			ice_output_fw_log(hw, &event.desc, event.msg_buf);
+			break;
+		case ice_aqc_opc_lldp_set_mib_change:
+			ice_dcb_process_lldp_set_mib_change(pf, &event);
+			break;
+		default:
+			dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
+				qtype, opcode);
+			break;
+		}
+	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
+
+	kfree(event.msg_buf);
+
+	return pending && (i == ICE_DFLT_IRQ_WORK);
+}
+
+/**
+ * ice_ctrlq_pending - check if there is a difference between ntc and ntu
+ * @hw: pointer to hardware info
+ * @cq: control queue information
+ *
+ * returns true if there are pending messages in a queue, false if there aren't
+ */
+static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	u16 ntu;
+
+	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+	return cq->rq.next_to_clean != ntu;
+}
+
+/**
+ * ice_clean_adminq_subtask - clean the AdminQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_adminq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
+		return;
+
+	clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
+
+	/* There might be a situation where new messages arrive to a control
+	 * queue between processing the last message and clearing the
+	 * EVENT_PENDING bit. So before exiting, check queue head again (using
+	 * ice_ctrlq_pending) and process new messages if any.
+	 */
+	if (ice_ctrlq_pending(hw, &hw->adminq))
+		__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_clean_mailboxq_subtask - clean the MailboxQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	if (!test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
+		return;
+
+	clear_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+
+	if (ice_ctrlq_pending(hw, &hw->mailboxq))
+		__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_service_task_schedule - schedule the service task to wake up
+ * @pf: board private structure
+ *
+ * If not already scheduled, this puts the task into the work queue.
+ */
+void ice_service_task_schedule(struct ice_pf *pf)
+{
+	if (!test_bit(__ICE_SERVICE_DIS, pf->state) &&
+	    !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) &&
+	    !test_bit(__ICE_NEEDS_RESTART, pf->state))
+		queue_work(ice_wq, &pf->serv_task);
+}
+
+/**
+ * ice_service_task_complete - finish up the service task
+ * @pf: board private structure
+ */
+static void ice_service_task_complete(struct ice_pf *pf)
+{
+	WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state));
+
+	/* force memory (pf->state) to sync before next service task */
+	smp_mb__before_atomic();
+	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+}
+
+/**
+ * ice_service_task_stop - stop service task and cancel works
+ * @pf: board private structure
+ *
+ * Return 0 if the __ICE_SERVICE_DIS bit was not already set,
+ * 1 otherwise.
+ */
+static int ice_service_task_stop(struct ice_pf *pf)
+{
+	int ret;
+
+	ret = test_and_set_bit(__ICE_SERVICE_DIS, pf->state);
+
+	if (pf->serv_tmr.function)
+		del_timer_sync(&pf->serv_tmr);
+	if (pf->serv_task.func)
+		cancel_work_sync(&pf->serv_task);
+
+	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+	return ret;
+}
+
+/**
+ * ice_service_task_restart - restart service task and schedule works
+ * @pf: board private structure
+ *
+ * This function is needed for suspend and resume works (e.g WoL scenario)
+ */
+static void ice_service_task_restart(struct ice_pf *pf)
+{
+	clear_bit(__ICE_SERVICE_DIS, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_service_timer - timer callback to schedule service task
+ * @t: pointer to timer_list
+ */
+static void ice_service_timer(struct timer_list *t)
+{
+	struct ice_pf *pf = from_timer(pf, t, serv_tmr);
+
+	mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_handle_mdd_event - handle malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from service task. OICR interrupt handler indicates MDD event.
+ * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
+ * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
+ * disable the queue, the PF can be configured to reset the VF using ethtool
+ * private flag mdd-auto-reset-vf.
+ */
+static void ice_handle_mdd_event(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	unsigned int i;
+	u32 reg;
+
+	if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state)) {
+		/* Since the VF MDD event logging is rate limited, check if
+		 * there are pending MDD events.
+		 */
+		ice_print_vfs_mdd_events(pf);
+		return;
+	}
+
+	/* find what triggered an MDD event */
+	reg = rd32(hw, GL_MDET_TX_PQM);
+	if (reg & GL_MDET_TX_PQM_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >>
+				GL_MDET_TX_PQM_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >>
+				GL_MDET_TX_PQM_VF_NUM_S;
+		u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >>
+				GL_MDET_TX_PQM_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >>
+				GL_MDET_TX_PQM_QNUM_S);
+
+		if (netif_msg_tx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
+	}
+
+	reg = rd32(hw, GL_MDET_TX_TCLAN);
+	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >>
+				GL_MDET_TX_TCLAN_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >>
+				GL_MDET_TX_TCLAN_VF_NUM_S;
+		u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >>
+				GL_MDET_TX_TCLAN_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
+				GL_MDET_TX_TCLAN_QNUM_S);
+
+		if (netif_msg_tx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
+	}
+
+	reg = rd32(hw, GL_MDET_RX);
+	if (reg & GL_MDET_RX_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >>
+				GL_MDET_RX_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >>
+				GL_MDET_RX_VF_NUM_S;
+		u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >>
+				GL_MDET_RX_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_RX_QNUM_M) >>
+				GL_MDET_RX_QNUM_S);
+
+		if (netif_msg_rx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_RX, 0xffffffff);
+	}
+
+	/* check to see if this PF caused an MDD event */
+	reg = rd32(hw, PF_MDET_TX_PQM);
+	if (reg & PF_MDET_TX_PQM_VALID_M) {
+		wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
+		if (netif_msg_tx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
+	}
+
+	reg = rd32(hw, PF_MDET_TX_TCLAN);
+	if (reg & PF_MDET_TX_TCLAN_VALID_M) {
+		wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
+		if (netif_msg_tx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
+	}
+
+	reg = rd32(hw, PF_MDET_RX);
+	if (reg & PF_MDET_RX_VALID_M) {
+		wr32(hw, PF_MDET_RX, 0xFFFF);
+		if (netif_msg_rx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
+	}
+
+	/* Check to see if one of the VFs caused an MDD event, and then
+	 * increment counters and set print pending
+	 */
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		reg = rd32(hw, VP_MDET_TX_PQM(i));
+		if (reg & VP_MDET_TX_PQM_VALID_M) {
+			wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
+			vf->mdd_tx_events.count++;
+			set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state);
+			if (netif_msg_tx_err(pf))
+				dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
+					 i);
+		}
+
+		reg = rd32(hw, VP_MDET_TX_TCLAN(i));
+		if (reg & VP_MDET_TX_TCLAN_VALID_M) {
+			wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
+			vf->mdd_tx_events.count++;
+			set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state);
+			if (netif_msg_tx_err(pf))
+				dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
+					 i);
+		}
+
+		reg = rd32(hw, VP_MDET_TX_TDPU(i));
+		if (reg & VP_MDET_TX_TDPU_VALID_M) {
+			wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
+			vf->mdd_tx_events.count++;
+			set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state);
+			if (netif_msg_tx_err(pf))
+				dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
+					 i);
+		}
+
+		reg = rd32(hw, VP_MDET_RX(i));
+		if (reg & VP_MDET_RX_VALID_M) {
+			wr32(hw, VP_MDET_RX(i), 0xFFFF);
+			vf->mdd_rx_events.count++;
+			set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state);
+			if (netif_msg_rx_err(pf))
+				dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
+					 i);
+
+			/* Since the queue is disabled on VF Rx MDD events, the
+			 * PF can be configured to reset the VF through ethtool
+			 * private flag mdd-auto-reset-vf.
+			 */
+			if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
+				/* VF MDD event counters will be cleared by
+				 * reset, so print the event prior to reset.
+				 */
+				ice_print_vf_rx_mdd_event(vf);
+				mutex_lock(&pf->vf[i].cfg_lock);
+				ice_reset_vf(&pf->vf[i], false);
+				mutex_unlock(&pf->vf[i].cfg_lock);
+			}
+		}
+	}
+
+	ice_print_vfs_mdd_events(pf);
+}
+
+/**
+ * ice_force_phys_link_state - Force the physical link state
+ * @vsi: VSI to force the physical link state to up/down
+ * @link_up: true/false indicates to set the physical link to up/down
+ *
+ * Force the physical link state by getting the current PHY capabilities from
+ * hardware and setting the PHY config based on the determined capabilities. If
+ * link changes a link event will be triggered because both the Enable Automatic
+ * Link Update and LESM Enable bits are set when setting the PHY capabilities.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_aqc_set_phy_cfg_data *cfg;
+	struct ice_port_info *pi;
+	struct device *dev;
+	int retcode;
+
+	if (!vsi || !vsi->port_info || !vsi->back)
+		return -EINVAL;
+	if (vsi->type != ICE_VSI_PF)
+		return 0;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	pi = vsi->port_info;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+				      NULL);
+	if (retcode) {
+		dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
+			vsi->vsi_num, retcode);
+		retcode = -EIO;
+		goto out;
+	}
+
+	/* No change in link */
+	if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
+	    link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
+		goto out;
+
+	/* Use the current user PHY configuration. The current user PHY
+	 * configuration is initialized during probe from PHY capabilities
+	 * software mode, and updated on set PHY configuration.
+	 */
+	cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL);
+	if (!cfg) {
+		retcode = -ENOMEM;
+		goto out;
+	}
+
+	cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+	if (link_up)
+		cfg->caps |= ICE_AQ_PHY_ENA_LINK;
+	else
+		cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
+
+	retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
+	if (retcode) {
+		dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
+			vsi->vsi_num, retcode);
+		retcode = -EIO;
+	}
+
+	kfree(cfg);
+out:
+	kfree(pcaps);
+	return retcode;
+}
+
+/**
+ * ice_init_nvm_phy_type - Initialize the NVM PHY type
+ * @pi: port info structure
+ *
+ * Initialize nvm_phy_type_[low|high] for link lenient mode support
+ */
+static int ice_init_nvm_phy_type(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_pf *pf = pi->hw->back;
+	enum ice_status status;
+	int err = 0;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, pcaps,
+				     NULL);
+
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
+		err = -EIO;
+		goto out;
+	}
+
+	pf->nvm_phy_type_hi = pcaps->phy_type_high;
+	pf->nvm_phy_type_lo = pcaps->phy_type_low;
+
+out:
+	kfree(pcaps);
+	return err;
+}
+
+/**
+ * ice_init_link_dflt_override - Initialize link default override
+ * @pi: port info structure
+ *
+ * Initialize link default override and PHY total port shutdown during probe
+ */
+static void ice_init_link_dflt_override(struct ice_port_info *pi)
+{
+	struct ice_link_default_override_tlv *ldo;
+	struct ice_pf *pf = pi->hw->back;
+
+	ldo = &pf->link_dflt_override;
+	if (ice_get_link_default_override(ldo, pi))
+		return;
+
+	if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
+		return;
+
+	/* Enable Total Port Shutdown (override/replace link-down-on-close
+	 * ethtool private flag) for ports with Port Disable bit set.
+	 */
+	set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
+	set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
+}
+
+/**
+ * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
+ * @pi: port info structure
+ *
+ * If default override is enabled, initialized the user PHY cfg speed and FEC
+ * settings using the default override mask from the NVM.
+ *
+ * The PHY should only be configured with the default override settings the
+ * first time media is available. The __ICE_LINK_DEFAULT_OVERRIDE_PENDING state
+ * is used to indicate that the user PHY cfg default override is initialized
+ * and the PHY has not been configured with the default override settings. The
+ * state is set here, and cleared in ice_configure_phy the first time the PHY is
+ * configured.
+ */
+static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
+{
+	struct ice_link_default_override_tlv *ldo;
+	struct ice_aqc_set_phy_cfg_data *cfg;
+	struct ice_phy_info *phy = &pi->phy;
+	struct ice_pf *pf = pi->hw->back;
+
+	ldo = &pf->link_dflt_override;
+
+	/* If link default override is enabled, use to mask NVM PHY capabilities
+	 * for speed and FEC default configuration.
+	 */
+	cfg = &phy->curr_user_phy_cfg;
+
+	if (ldo->phy_type_low || ldo->phy_type_high) {
+		cfg->phy_type_low = pf->nvm_phy_type_lo &
+				    cpu_to_le64(ldo->phy_type_low);
+		cfg->phy_type_high = pf->nvm_phy_type_hi &
+				     cpu_to_le64(ldo->phy_type_high);
+	}
+	cfg->link_fec_opt = ldo->fec_options;
+	phy->curr_user_fec_req = ICE_FEC_AUTO;
+
+	set_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
+}
+
+/**
+ * ice_init_phy_user_cfg - Initialize the PHY user configuration
+ * @pi: port info structure
+ *
+ * Initialize the current user PHY configuration, speed, FEC, and FC requested
+ * mode to default. The PHY defaults are from get PHY capabilities topology
+ * with media so call when media is first available. An error is returned if
+ * called when media is not available. The PHY initialization completed state is
+ * set here.
+ *
+ * These configurations are used when setting PHY
+ * configuration. The user PHY configuration is updated on set PHY
+ * configuration. Returns 0 on success, negative on failure
+ */
+static int ice_init_phy_user_cfg(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_phy_info *phy = &pi->phy;
+	struct ice_pf *pf = pi->hw->back;
+	enum ice_status status;
+	struct ice_vsi *vsi;
+	int err = 0;
+
+	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+		return -EIO;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return -EINVAL;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
+				     NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
+		err = -EIO;
+		goto err_out;
+	}
+
+	ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
+
+	/* check if lenient mode is supported and enabled */
+	if (ice_fw_supports_link_override(&vsi->back->hw) &&
+	    !(pcaps->module_compliance_enforcement &
+	      ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
+		set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
+
+		/* if link default override is enabled, initialize user PHY
+		 * configuration with link default override values
+		 */
+		if (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN) {
+			ice_init_phy_cfg_dflt_override(pi);
+			goto out;
+		}
+	}
+
+	/* if link default override is not enabled, initialize PHY using
+	 * topology with media
+	 */
+	phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
+						      pcaps->link_fec_options);
+	phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps);
+
+out:
+	phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
+	set_bit(__ICE_PHY_INIT_COMPLETE, pf->state);
+err_out:
+	kfree(pcaps);
+	return err;
+}
+
+/**
+ * ice_configure_phy - configure PHY
+ * @vsi: VSI of PHY
+ *
+ * Set the PHY configuration. If the current PHY configuration is the same as
+ * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
+ * configure the based get PHY capabilities for topology with media.
+ */
+static int ice_configure_phy(struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_aqc_set_phy_cfg_data *cfg;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	int err = 0;
+
+	pi = vsi->port_info;
+	if (!pi)
+		return -EINVAL;
+
+	/* Ensure we have media as we cannot configure a medialess port */
+	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+		return -EPERM;
+
+	ice_print_topo_conflict(vsi);
+
+	if (vsi->port_info->phy.link_info.topo_media_conflict ==
+	    ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
+		return -EPERM;
+
+	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
+		return ice_force_phys_link_state(vsi, true);
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	/* Get current PHY config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+				     NULL);
+	if (status) {
+		dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+		err = -EIO;
+		goto done;
+	}
+
+	/* If PHY enable link is configured and configuration has not changed,
+	 * there's nothing to do
+	 */
+	if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
+	    ice_phy_caps_equals_cfg(pcaps, &pi->phy.curr_user_phy_cfg))
+		goto done;
+
+	/* Use PHY topology as baseline for configuration */
+	memset(pcaps, 0, sizeof(*pcaps));
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
+				     NULL);
+	if (status) {
+		dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+		err = -EIO;
+		goto done;
+	}
+
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (!cfg) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	ice_copy_phy_caps_to_cfg(pi, pcaps, cfg);
+
+	/* Speed - If default override pending, use curr_user_phy_cfg set in
+	 * ice_init_phy_user_cfg_ldo.
+	 */
+	if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+			       vsi->back->state)) {
+		cfg->phy_type_low = pi->phy.curr_user_phy_cfg.phy_type_low;
+		cfg->phy_type_high = pi->phy.curr_user_phy_cfg.phy_type_high;
+	} else {
+		u64 phy_low = 0, phy_high = 0;
+
+		ice_update_phy_type(&phy_low, &phy_high,
+				    pi->phy.curr_user_speed_req);
+		cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
+		cfg->phy_type_high = pcaps->phy_type_high &
+				     cpu_to_le64(phy_high);
+	}
+
+	/* Can't provide what was requested; use PHY capabilities */
+	if (!cfg->phy_type_low && !cfg->phy_type_high) {
+		cfg->phy_type_low = pcaps->phy_type_low;
+		cfg->phy_type_high = pcaps->phy_type_high;
+	}
+
+	/* FEC */
+	ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
+
+	/* Can't provide what was requested; use PHY capabilities */
+	if (cfg->link_fec_opt !=
+	    (cfg->link_fec_opt & pcaps->link_fec_options)) {
+		cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
+		cfg->link_fec_opt = pcaps->link_fec_options;
+	}
+
+	/* Flow Control - always supported; no need to check against
+	 * capabilities
+	 */
+	ice_cfg_phy_fc(pi, cfg, pi->phy.curr_user_fc_req);
+
+	/* Enable link and link update */
+	cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
+
+	status = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
+	if (status) {
+		dev_err(dev, "Failed to set phy config, VSI %d error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
+		err = -EIO;
+	}
+
+	kfree(cfg);
+done:
+	kfree(pcaps);
+	return err;
+}
+
+/**
+ * ice_check_media_subtask - Check for media
+ * @pf: pointer to PF struct
+ *
+ * If media is available, then initialize PHY user configuration if it is not
+ * been, and configure the PHY if the interface is up.
+ */
+static void ice_check_media_subtask(struct ice_pf *pf)
+{
+	struct ice_port_info *pi;
+	struct ice_vsi *vsi;
+	int err;
+
+	/* No need to check for media if it's already present */
+	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
+		return;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* Refresh link info and check if media is present */
+	pi = vsi->port_info;
+	err = ice_update_link_info(pi);
+	if (err)
+		return;
+
+	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state))
+			ice_init_phy_user_cfg(pi);
+
+		/* PHY settings are reset on media insertion, reconfigure
+		 * PHY to preserve settings.
+		 */
+		if (test_bit(__ICE_DOWN, vsi->state) &&
+		    test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
+			return;
+
+		err = ice_configure_phy(vsi);
+		if (!err)
+			clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+
+		/* A Link Status Event will be generated; the event handler
+		 * will complete bringing the interface up
+		 */
+	}
+}
+
+/**
+ * ice_service_task - manage and run subtasks
+ * @work: pointer to work_struct contained by the PF struct
+ */
+static void ice_service_task(struct work_struct *work)
+{
+	struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
+	unsigned long start_time = jiffies;
+
+	/* subtasks */
+
+	/* process reset requests first */
+	ice_reset_subtask(pf);
+
+	/* bail if a reset/recovery cycle is pending or rebuild failed */
+	if (ice_is_reset_in_progress(pf->state) ||
+	    test_bit(__ICE_SUSPENDED, pf->state) ||
+	    test_bit(__ICE_NEEDS_RESTART, pf->state)) {
+		ice_service_task_complete(pf);
+		return;
+	}
+
+	ice_clean_adminq_subtask(pf);
+	ice_check_media_subtask(pf);
+	ice_check_for_hang_subtask(pf);
+	ice_sync_fltr_subtask(pf);
+	ice_handle_mdd_event(pf);
+	ice_watchdog_subtask(pf);
+
+	if (ice_is_safe_mode(pf)) {
+		ice_service_task_complete(pf);
+		return;
+	}
+
+	ice_process_vflr_event(pf);
+	ice_clean_mailboxq_subtask(pf);
+	ice_sync_arfs_fltrs(pf);
+	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
+	ice_service_task_complete(pf);
+
+	/* If the tasks have taken longer than one service timer period
+	 * or there is more work to be done, reset the service timer to
+	 * schedule the service task now.
+	 */
+	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
+	    test_bit(__ICE_MDD_EVENT_PENDING, pf->state) ||
+	    test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
+	    test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
+	    test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
+		mod_timer(&pf->serv_tmr, jiffies);
+}
+
+/**
+ * ice_set_ctrlq_len - helper function to set controlq length
+ * @hw: pointer to the HW instance
+ */
+static void ice_set_ctrlq_len(struct ice_hw *hw)
+{
+	hw->adminq.num_rq_entries = ICE_AQ_LEN;
+	hw->adminq.num_sq_entries = ICE_AQ_LEN;
+	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
+	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
+	hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M;
+	hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
+	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
+	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
+}
+
+/**
+ * ice_schedule_reset - schedule a reset
+ * @pf: board private structure
+ * @reset: reset being requested
+ */
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	/* bail out if earlier reset has failed */
+	if (test_bit(__ICE_RESET_FAILED, pf->state)) {
+		dev_dbg(dev, "earlier reset has failed\n");
+		return -EIO;
+	}
+	/* bail if reset/recovery already in progress */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_dbg(dev, "Reset already in progress\n");
+		return -EBUSY;
+	}
+
+	switch (reset) {
+	case ICE_RESET_PFR:
+		set_bit(__ICE_PFR_REQ, pf->state);
+		break;
+	case ICE_RESET_CORER:
+		set_bit(__ICE_CORER_REQ, pf->state);
+		break;
+	case ICE_RESET_GLOBR:
+		set_bit(__ICE_GLOBR_REQ, pf->state);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ice_service_task_schedule(pf);
+	return 0;
+}
+
+/**
+ * ice_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ */
+static void
+ice_irq_affinity_notify(struct irq_affinity_notify *notify,
+			const cpumask_t *mask)
+{
+	struct ice_q_vector *q_vector =
+		container_of(notify, struct ice_q_vector, affinity_notify);
+
+	cpumask_copy(&q_vector->affinity_mask, mask);
+}
+
+/**
+ * ice_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ */
+static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
+
+/**
+ * ice_vsi_ena_irq - Enable IRQ for the given VSI
+ * @vsi: the VSI being configured
+ */
+static int ice_vsi_ena_irq(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	int i;
+
+	ice_for_each_q_vector(vsi, i)
+		ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
+
+	ice_flush(hw);
+	return 0;
+}
+
+/**
+ * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ */
+static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
+{
+	int q_vectors = vsi->num_q_vectors;
+	struct ice_pf *pf = vsi->back;
+	int base = vsi->base_vector;
+	struct device *dev;
+	int rx_int_idx = 0;
+	int tx_int_idx = 0;
+	int vector, err;
+	int irq_num;
+
+	dev = ice_pf_to_dev(pf);
+	for (vector = 0; vector < q_vectors; vector++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[vector];
+
+		irq_num = pf->msix_entries[base + vector].vector;
+
+		if (q_vector->tx.ring && q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
+			tx_int_idx++;
+		} else if (q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "rx", rx_int_idx++);
+		} else if (q_vector->tx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "tx", tx_int_idx++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0,
+				       q_vector->name, q_vector);
+		if (err) {
+			netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
+				   err);
+			goto free_q_irqs;
+		}
+
+		/* register for affinity change notifications */
+		if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
+			struct irq_affinity_notify *affinity_notify;
+
+			affinity_notify = &q_vector->affinity_notify;
+			affinity_notify->notify = ice_irq_affinity_notify;
+			affinity_notify->release = ice_irq_affinity_release;
+			irq_set_affinity_notifier(irq_num, affinity_notify);
+		}
+
+		/* assign the mask for this irq */
+		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+	}
+
+	vsi->irqs_ready = true;
+	return 0;
+
+free_q_irqs:
+	while (vector) {
+		vector--;
+		irq_num = pf->msix_entries[base + vector].vector;
+		if (!IS_ENABLED(CONFIG_RFS_ACCEL))
+			irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
+	}
+	return err;
+}
+
+/**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	int i;
+
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		u16 xdp_q_idx = vsi->alloc_txq + i;
+		struct ice_ring *xdp_ring;
+
+		xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+
+		if (!xdp_ring)
+			goto free_xdp_rings;
+
+		xdp_ring->q_index = xdp_q_idx;
+		xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+		xdp_ring->ring_active = false;
+		xdp_ring->vsi = vsi;
+		xdp_ring->netdev = NULL;
+		xdp_ring->dev = dev;
+		xdp_ring->count = vsi->num_tx_desc;
+		WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
+		if (ice_setup_tx_ring(xdp_ring))
+			goto free_xdp_rings;
+		ice_set_ring_xdp(xdp_ring);
+		xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring);
+	}
+
+	return 0;
+
+free_xdp_rings:
+	for (; i >= 0; i--)
+		if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+			ice_free_tx_ring(vsi->xdp_rings[i]);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+	struct bpf_prog *old_prog;
+	int i;
+
+	old_prog = xchg(&vsi->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	ice_for_each_rxq(vsi, i)
+		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	int xdp_rings_rem = vsi->num_xdp_txq;
+	struct ice_pf *pf = vsi->back;
+	struct ice_qs_cfg xdp_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_txqs,
+		.pf_map_size = pf->max_pf_txqs,
+		.q_count = vsi->num_xdp_txq,
+		.scatter_count = ICE_MAX_SCATTER_TXQS,
+		.vsi_map = vsi->txq_map,
+		.vsi_map_offset = vsi->alloc_txq,
+		.mapping_mode = ICE_VSI_MAP_CONTIG
+	};
+	enum ice_status status;
+	struct device *dev;
+	int i, v_idx;
+
+	dev = ice_pf_to_dev(pf);
+	vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
+				      sizeof(*vsi->xdp_rings), GFP_KERNEL);
+	if (!vsi->xdp_rings)
+		return -ENOMEM;
+
+	vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+	if (__ice_vsi_get_qs(&xdp_qs_cfg))
+		goto err_map_xdp;
+
+	if (ice_xdp_alloc_setup_rings(vsi))
+		goto clear_xdp_rings;
+
+	/* follow the logic from ice_vsi_map_rings_to_vectors */
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		int xdp_rings_per_v, q_id, q_base;
+
+		xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+					       vsi->num_q_vectors - v_idx);
+		q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+			struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+			xdp_ring->q_vector = q_vector;
+			xdp_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = xdp_ring;
+		}
+		xdp_rings_rem -= xdp_rings_per_v;
+	}
+
+	/* omit the scheduler update if in reset path; XDP queues will be
+	 * taken into account at the end of ice_vsi_rebuild, where
+	 * ice_cfg_vsi_lan is being called
+	 */
+	if (ice_is_reset_in_progress(pf->state))
+		return 0;
+
+	/* tell the Tx scheduler that right now we have
+	 * additional queues
+	 */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
+		dev_err(dev, "Failed VSI LAN queue config for XDP, error: %s\n",
+			ice_stat_str(status));
+		goto clear_xdp_rings;
+	}
+
+	/* assign the prog only when it's not already present on VSI;
+	 * this flow is a subject of both ethtool -L and ndo_bpf flows;
+	 * VSI rebuild that happens under ethtool -L can expose us to
+	 * the bpf_prog refcount issues as we would be swapping same
+	 * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
+	 * on it as it would be treated as an 'old_prog'; for ndo_bpf
+	 * this is not harmful as dev_xdp_install bumps the refcount
+	 * before calling the op exposed by the driver;
+	 */
+	if (!ice_is_xdp_ena_vsi(vsi))
+		ice_vsi_assign_bpf_prog(vsi, prog);
+
+	return 0;
+clear_xdp_rings:
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		if (vsi->xdp_rings[i]) {
+			kfree_rcu(vsi->xdp_rings[i], rcu);
+			vsi->xdp_rings[i] = NULL;
+		}
+
+err_map_xdp:
+	mutex_lock(&pf->avail_q_mutex);
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	devm_kfree(dev, vsi->xdp_rings);
+	return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_pf *pf = vsi->back;
+	int i, v_idx;
+
+	/* q_vectors are freed in reset path so there's no point in detaching
+	 * rings; in case of rebuild being triggered not from reset bits
+	 * in pf->state won't be set, so additionally check first q_vector
+	 * against NULL
+	 */
+	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+		goto free_qmap;
+
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		struct ice_ring *ring;
+
+		ice_for_each_ring(ring, q_vector->tx)
+			if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+				break;
+
+		/* restore the value of last node prior to XDP setup */
+		q_vector->tx.ring = ring;
+	}
+
+free_qmap:
+	mutex_lock(&pf->avail_q_mutex);
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		if (vsi->xdp_rings[i]) {
+			if (vsi->xdp_rings[i]->desc) {
+				synchronize_rcu();
+				ice_free_tx_ring(vsi->xdp_rings[i]);
+			}
+			kfree_rcu(vsi->xdp_rings[i], rcu);
+			vsi->xdp_rings[i] = NULL;
+		}
+
+	devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings);
+	vsi->xdp_rings = NULL;
+
+	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+		return 0;
+
+	ice_vsi_assign_bpf_prog(vsi, NULL);
+
+	/* notify Tx scheduler that we destroyed XDP queues and bring
+	 * back the old number of child nodes
+	 */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq;
+
+	/* change number of XDP Tx queues to 0 */
+	vsi->num_xdp_txq = 0;
+
+	return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+			       max_txqs);
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+		   struct netlink_ext_ack *extack)
+{
+	int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+	bool if_running = netif_running(vsi->netdev);
+	int ret = 0, xdp_ring_err = 0;
+
+	if (frame_size > vsi->rx_buf_len) {
+		NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
+		return -EOPNOTSUPP;
+	}
+
+	/* need to stop netdev while setting up the program for Rx rings */
+	if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		ret = ice_down(vsi);
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
+			return ret;
+		}
+	}
+
+	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+		vsi->num_xdp_txq = vsi->alloc_rxq;
+		xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
+	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+		xdp_ring_err = ice_destroy_xdp_rings(vsi);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
+	} else {
+		/* safe to call even when prog == vsi->xdp_prog as
+		 * dev_xdp_install in net/core/dev.c incremented prog's
+		 * refcount so corresponding bpf_prog_put won't cause
+		 * underflow
+		 */
+		ice_vsi_assign_bpf_prog(vsi, prog);
+	}
+
+	if (if_running)
+		ret = ice_up(vsi);
+
+	if (!ret && prog && vsi->xsk_pools) {
+		int i;
+
+		ice_for_each_rxq(vsi, i) {
+			struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+			if (rx_ring->xsk_pool)
+				napi_schedule(&rx_ring->q_vector->napi);
+		}
+	}
+
+	return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp_safe_mode - XDP handler for safe mode
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
+			     struct netdev_bpf *xdp)
+{
+	NL_SET_ERR_MSG_MOD(xdp->extack,
+			   "Please provide working DDP firmware package in order to use XDP\n"
+			   "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (vsi->type != ICE_VSI_PF) {
+		NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
+		return -EINVAL;
+	}
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+	case XDP_SETUP_XSK_POOL:
+		return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
+					  xdp->xsk.queue_id);
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * ice_ena_misc_vector - enable the non-queue interrupts
+ * @pf: board private structure
+ */
+static void ice_ena_misc_vector(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	/* Disable anti-spoof detection interrupt to prevent spurious event
+	 * interrupts during a function reset. Anti-spoof functionally is
+	 * still supported.
+	 */
+	val = rd32(hw, GL_MDCK_TX_TDPU);
+	val |= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M;
+	wr32(hw, GL_MDCK_TX_TDPU, val);
+
+	/* clear things first */
+	wr32(hw, PFINT_OICR_ENA, 0);	/* disable all */
+	rd32(hw, PFINT_OICR);		/* read to clear */
+
+	val = (PFINT_OICR_ECC_ERR_M |
+	       PFINT_OICR_MAL_DETECT_M |
+	       PFINT_OICR_GRST_M |
+	       PFINT_OICR_PCI_EXCEPTION_M |
+	       PFINT_OICR_VFLR_M |
+	       PFINT_OICR_HMC_ERR_M |
+	       PFINT_OICR_PE_CRITERR_M);
+
+	wr32(hw, PFINT_OICR_ENA, val);
+
+	/* SW_ITR_IDX = 0, but don't change INTENA */
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
+}
+
+/**
+ * ice_misc_intr - misc interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
+{
+	struct ice_pf *pf = (struct ice_pf *)data;
+	struct ice_hw *hw = &pf->hw;
+	irqreturn_t ret = IRQ_NONE;
+	struct device *dev;
+	u32 oicr, ena_mask;
+
+	dev = ice_pf_to_dev(pf);
+	set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
+	set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+
+	oicr = rd32(hw, PFINT_OICR);
+	ena_mask = rd32(hw, PFINT_OICR_ENA);
+
+	if (oicr & PFINT_OICR_SWINT_M) {
+		ena_mask &= ~PFINT_OICR_SWINT_M;
+		pf->sw_int_count++;
+	}
+
+	if (oicr & PFINT_OICR_MAL_DETECT_M) {
+		ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
+		set_bit(__ICE_MDD_EVENT_PENDING, pf->state);
+	}
+	if (oicr & PFINT_OICR_VFLR_M) {
+		/* disable any further VFLR event notifications */
+		if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) {
+			u32 reg = rd32(hw, PFINT_OICR_ENA);
+
+			reg &= ~PFINT_OICR_VFLR_M;
+			wr32(hw, PFINT_OICR_ENA, reg);
+		} else {
+			ena_mask &= ~PFINT_OICR_VFLR_M;
+			set_bit(__ICE_VFLR_EVENT_PENDING, pf->state);
+		}
+	}
+
+	if (oicr & PFINT_OICR_GRST_M) {
+		u32 reset;
+
+		/* we have a reset warning */
+		ena_mask &= ~PFINT_OICR_GRST_M;
+		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
+			GLGEN_RSTAT_RESET_TYPE_S;
+
+		if (reset == ICE_RESET_CORER)
+			pf->corer_count++;
+		else if (reset == ICE_RESET_GLOBR)
+			pf->globr_count++;
+		else if (reset == ICE_RESET_EMPR)
+			pf->empr_count++;
+		else
+			dev_dbg(dev, "Invalid reset type %d\n", reset);
+
+		/* If a reset cycle isn't already in progress, we set a bit in
+		 * pf->state so that the service task can start a reset/rebuild.
+		 * We also make note of which reset happened so that peer
+		 * devices/drivers can be informed.
+		 */
+		if (!test_and_set_bit(__ICE_RESET_OICR_RECV, pf->state)) {
+			if (reset == ICE_RESET_CORER)
+				set_bit(__ICE_CORER_RECV, pf->state);
+			else if (reset == ICE_RESET_GLOBR)
+				set_bit(__ICE_GLOBR_RECV, pf->state);
+			else
+				set_bit(__ICE_EMPR_RECV, pf->state);
+
+			/* There are couple of different bits at play here.
+			 * hw->reset_ongoing indicates whether the hardware is
+			 * in reset. This is set to true when a reset interrupt
+			 * is received and set back to false after the driver
+			 * has determined that the hardware is out of reset.
+			 *
+			 * __ICE_RESET_OICR_RECV in pf->state indicates
+			 * that a post reset rebuild is required before the
+			 * driver is operational again. This is set above.
+			 *
+			 * As this is the start of the reset/rebuild cycle, set
+			 * both to indicate that.
+			 */
+			hw->reset_ongoing = true;
+		}
+	}
+
+	if (oicr & PFINT_OICR_HMC_ERR_M) {
+		ena_mask &= ~PFINT_OICR_HMC_ERR_M;
+		dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n",
+			rd32(hw, PFHMC_ERRORINFO),
+			rd32(hw, PFHMC_ERRORDATA));
+	}
+
+	/* Report any remaining unexpected interrupts */
+	oicr &= ena_mask;
+	if (oicr) {
+		dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
+		/* If a critical error is pending there is no choice but to
+		 * reset the device.
+		 */
+		if (oicr & (PFINT_OICR_PE_CRITERR_M |
+			    PFINT_OICR_PCI_EXCEPTION_M |
+			    PFINT_OICR_ECC_ERR_M)) {
+			set_bit(__ICE_PFR_REQ, pf->state);
+			ice_service_task_schedule(pf);
+		}
+	}
+	ret = IRQ_HANDLED;
+
+	ice_service_task_schedule(pf);
+	ice_irq_dynamic_ena(hw, NULL, NULL);
+
+	return ret;
+}
+
+/**
+ * ice_dis_ctrlq_interrupts - disable control queue interrupts
+ * @hw: pointer to HW structure
+ */
+static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
+{
+	/* disable Admin queue Interrupt causes */
+	wr32(hw, PFINT_FW_CTL,
+	     rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
+
+	/* disable Mailbox queue Interrupt causes */
+	wr32(hw, PFINT_MBX_CTL,
+	     rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
+
+	/* disable Control queue Interrupt causes */
+	wr32(hw, PFINT_OICR_CTL,
+	     rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_free_irq_msix_misc - Unroll misc vector setup
+ * @pf: board private structure
+ */
+static void ice_free_irq_msix_misc(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	ice_dis_ctrlq_interrupts(hw);
+
+	/* disable OICR interrupt */
+	wr32(hw, PFINT_OICR_ENA, 0);
+	ice_flush(hw);
+
+	if (pf->msix_entries) {
+		synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
+		devm_free_irq(ice_pf_to_dev(pf),
+			      pf->msix_entries[pf->oicr_idx].vector, pf);
+	}
+
+	pf->num_avail_sw_msix += 1;
+	ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
+}
+
+/**
+ * ice_ena_ctrlq_interrupts - enable control queue interrupts
+ * @hw: pointer to HW structure
+ * @reg_idx: HW vector index to associate the control queue interrupts with
+ */
+static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
+{
+	u32 val;
+
+	val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
+	       PFINT_OICR_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_OICR_CTL, val);
+
+	/* enable Admin queue Interrupt causes */
+	val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) |
+	       PFINT_FW_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_FW_CTL, val);
+
+	/* enable Mailbox queue Interrupt causes */
+	val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) |
+	       PFINT_MBX_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_MBX_CTL, val);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0, which is used to manage the
+ * non-queue interrupts, e.g. AdminQ and errors. This is not used
+ * when in MSI or Legacy interrupt mode.
+ */
+static int ice_req_irq_msix_misc(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int oicr_idx, err = 0;
+
+	if (!pf->int_name[0])
+		snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
+			 dev_driver_string(dev), dev_name(dev));
+
+	/* Do not request IRQ but do enable OICR interrupt since settings are
+	 * lost during reset. Note that this function is called only during
+	 * rebuild path and not while reset is in progress.
+	 */
+	if (ice_is_reset_in_progress(pf->state))
+		goto skip_req_irq;
+
+	/* reserve one vector in irq_tracker for misc interrupts */
+	oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+	if (oicr_idx < 0)
+		return oicr_idx;
+
+	pf->num_avail_sw_msix -= 1;
+	pf->oicr_idx = (u16)oicr_idx;
+
+	err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector,
+			       ice_misc_intr, 0, pf->int_name, pf);
+	if (err) {
+		dev_err(dev, "devm_request_irq for %s failed: %d\n",
+			pf->int_name, err);
+		ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+		pf->num_avail_sw_msix += 1;
+		return err;
+	}
+
+skip_req_irq:
+	ice_ena_misc_vector(pf);
+
+	ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
+	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
+	     ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
+
+	ice_flush(hw);
+	ice_irq_dynamic_ena(hw, NULL, NULL);
+
+	return 0;
+}
+
+/**
+ * ice_napi_add - register NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be registered
+ *
+ * This function is only called in the driver's load path. Registering the NAPI
+ * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
+ * reset/rebuild, etc.)
+ */
+static void ice_napi_add(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
+			       ice_napi_poll, NAPI_POLL_WEIGHT);
+}
+
+/**
+ * ice_set_ops - set netdev and ethtools ops for the given netdev
+ * @netdev: netdev instance
+ */
+static void ice_set_ops(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if (ice_is_safe_mode(pf)) {
+		netdev->netdev_ops = &ice_netdev_safe_mode_ops;
+		ice_set_ethtool_safe_mode_ops(netdev);
+		return;
+	}
+
+	netdev->netdev_ops = &ice_netdev_ops;
+	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
+	ice_set_ethtool_ops(netdev);
+}
+
+/**
+ * ice_set_netdev_features - set features for the given netdev
+ * @netdev: netdev instance
+ */
+static void ice_set_netdev_features(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	netdev_features_t csumo_features;
+	netdev_features_t vlano_features;
+	netdev_features_t dflt_features;
+	netdev_features_t tso_features;
+
+	if (ice_is_safe_mode(pf)) {
+		/* safe mode */
+		netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA;
+		netdev->hw_features = netdev->features;
+		return;
+	}
+
+	dflt_features = NETIF_F_SG	|
+			NETIF_F_HIGHDMA	|
+			NETIF_F_NTUPLE	|
+			NETIF_F_RXHASH;
+
+	csumo_features = NETIF_F_RXCSUM	  |
+			 NETIF_F_IP_CSUM  |
+			 NETIF_F_SCTP_CRC |
+			 NETIF_F_IPV6_CSUM;
+
+	vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
+			 NETIF_F_HW_VLAN_CTAG_TX     |
+			 NETIF_F_HW_VLAN_CTAG_RX;
+
+	tso_features = NETIF_F_TSO			|
+		       NETIF_F_TSO_ECN			|
+		       NETIF_F_TSO6			|
+		       NETIF_F_GSO_GRE			|
+		       NETIF_F_GSO_UDP_TUNNEL		|
+		       NETIF_F_GSO_GRE_CSUM		|
+		       NETIF_F_GSO_UDP_TUNNEL_CSUM	|
+		       NETIF_F_GSO_PARTIAL		|
+		       NETIF_F_GSO_IPXIP4		|
+		       NETIF_F_GSO_IPXIP6		|
+		       NETIF_F_GSO_UDP_L4;
+
+	netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
+					NETIF_F_GSO_GRE_CSUM;
+	/* set features that user can change */
+	netdev->hw_features = dflt_features | csumo_features |
+			      vlano_features | tso_features;
+
+	/* add support for HW_CSUM on packets with MPLS header */
+	netdev->mpls_features =  NETIF_F_HW_CSUM;
+
+	/* enable features */
+	netdev->features |= netdev->hw_features;
+	/* encap and VLAN devices inherit default, csumo and tso features */
+	netdev->hw_enc_features |= dflt_features | csumo_features |
+				   tso_features;
+	netdev->vlan_features |= dflt_features | csumo_features |
+				 tso_features;
+}
+
+/**
+ * ice_cfg_netdev - Allocate, configure and register a netdev
+ * @vsi: the VSI associated with the new netdev
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_cfg_netdev(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_netdev_priv *np;
+	struct net_device *netdev;
+	u8 mac_addr[ETH_ALEN];
+	int err;
+
+	err = ice_devlink_create_port(vsi);
+	if (err)
+		return err;
+
+	netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
+				    vsi->alloc_rxq);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_destroy_devlink_port;
+	}
+
+	vsi->netdev = netdev;
+	np = netdev_priv(netdev);
+	np->vsi = vsi;
+
+	ice_set_netdev_features(netdev);
+
+	ice_set_ops(netdev);
+
+	if (vsi->type == ICE_VSI_PF) {
+		SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf));
+		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+		ether_addr_copy(netdev->dev_addr, mac_addr);
+		ether_addr_copy(netdev->perm_addr, mac_addr);
+	}
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* Setup netdev TC information */
+	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
+
+	/* setup watchdog timeout value to be 5 second */
+	netdev->watchdog_timeo = 5 * HZ;
+
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = ICE_MAX_MTU;
+
+	err = register_netdev(vsi->netdev);
+	if (err)
+		goto err_free_netdev;
+
+	devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev);
+
+	netif_carrier_off(vsi->netdev);
+
+	/* make sure transmit queues start off as stopped */
+	netif_tx_stop_all_queues(vsi->netdev);
+
+	return 0;
+
+err_free_netdev:
+	free_netdev(vsi->netdev);
+	vsi->netdev = NULL;
+err_destroy_devlink_port:
+	ice_devlink_destroy_port(vsi);
+	return err;
+}
+
+/**
+ * ice_fill_rss_lut - Fill the RSS lookup table with default values
+ * @lut: Lookup table
+ * @rss_table_size: Lookup table size
+ * @rss_size: Range of queue number for hashing
+ */
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
+{
+	u16 i;
+
+	for (i = 0; i < rss_table_size; i++)
+		lut[i] = i % rss_size;
+}
+
+/**
+ * ice_pf_vsi_setup - Set up a PF VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID);
+}
+
+/**
+ * ice_ctrl_vsi_setup - Set up a control VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID);
+}
+
+/**
+ * ice_lb_vsi_setup - Set up a loopback VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+struct ice_vsi *
+ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID);
+}
+
+/**
+ * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol
+ * @vid: VLAN ID to be added
+ *
+ * net_device_ops implementation for adding VLAN IDs
+ */
+static int
+ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
+		    u16 vid)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret;
+
+	if (vid >= VLAN_N_VID) {
+		netdev_err(netdev, "VLAN id requested %d is out of range %d\n",
+			   vid, VLAN_N_VID);
+		return -EINVAL;
+	}
+
+	if (vsi->info.pvid)
+		return -EINVAL;
+
+	/* VLAN 0 is added by default during load/reset */
+	if (!vid)
+		return 0;
+
+	/* Enable VLAN pruning when a VLAN other than 0 is added */
+	if (!ice_vsi_is_vlan_pruning_ena(vsi)) {
+		ret = ice_cfg_vlan_pruning(vsi, true, false);
+		if (ret)
+			return ret;
+	}
+
+	/* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
+	 * packets aren't pruned by the device's internal switch on Rx
+	 */
+	ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
+	if (!ret) {
+		vsi->vlan_ena = true;
+		set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol
+ * @vid: VLAN ID to be removed
+ *
+ * net_device_ops implementation for removing VLAN IDs
+ */
+static int
+ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
+		     u16 vid)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret;
+
+	if (vsi->info.pvid)
+		return -EINVAL;
+
+	/* don't allow removal of VLAN 0 */
+	if (!vid)
+		return 0;
+
+	/* Make sure ice_vsi_kill_vlan is successful before updating VLAN
+	 * information
+	 */
+	ret = ice_vsi_kill_vlan(vsi, vid);
+	if (ret)
+		return ret;
+
+	/* Disable pruning when VLAN 0 is the only VLAN rule */
+	if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi))
+		ret = ice_cfg_vlan_pruning(vsi, false, false);
+
+	vsi->vlan_ena = false;
+	set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+	return ret;
+}
+
+/**
+ * ice_setup_pf_sw - Setup the HW switch on startup or after reset
+ * @pf: board private structure
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_setup_pf_sw(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi;
+	int status = 0;
+
+	if (ice_is_reset_in_progress(pf->state))
+		return -EBUSY;
+
+	vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
+	if (!vsi)
+		return -ENOMEM;
+
+	status = ice_cfg_netdev(vsi);
+	if (status) {
+		status = -ENODEV;
+		goto unroll_vsi_setup;
+	}
+	/* netdev has to be configured before setting frame size */
+	ice_vsi_cfg_frame_size(vsi);
+
+	/* Setup DCB netlink interface */
+	ice_dcbnl_setup(vsi);
+
+	/* registering the NAPI handler requires both the queues and
+	 * netdev to be created, which are done in ice_pf_vsi_setup()
+	 * and ice_cfg_netdev() respectively
+	 */
+	ice_napi_add(vsi);
+
+	status = ice_set_cpu_rx_rmap(vsi);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "Failed to set CPU Rx map VSI %d error %d\n",
+			vsi->vsi_num, status);
+		status = -EINVAL;
+		goto unroll_napi_add;
+	}
+	status = ice_init_mac_fltr(pf);
+	if (status)
+		goto free_cpu_rx_map;
+
+	return status;
+
+free_cpu_rx_map:
+	ice_free_cpu_rx_rmap(vsi);
+
+unroll_napi_add:
+	if (vsi) {
+		ice_napi_del(vsi);
+		if (vsi->netdev) {
+			if (vsi->netdev->reg_state == NETREG_REGISTERED)
+				unregister_netdev(vsi->netdev);
+			free_netdev(vsi->netdev);
+			vsi->netdev = NULL;
+		}
+	}
+
+unroll_vsi_setup:
+	ice_vsi_release(vsi);
+	return status;
+}
+
+/**
+ * ice_get_avail_q_count - Get count of queues in use
+ * @pf_qmap: bitmap to get queue use count from
+ * @lock: pointer to a mutex that protects access to pf_qmap
+ * @size: size of the bitmap
+ */
+static u16
+ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
+{
+	unsigned long bit;
+	u16 count = 0;
+
+	mutex_lock(lock);
+	for_each_clear_bit(bit, pf_qmap, size)
+		count++;
+	mutex_unlock(lock);
+
+	return count;
+}
+
+/**
+ * ice_get_avail_txq_count - Get count of Tx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_txq_count(struct ice_pf *pf)
+{
+	return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex,
+				     pf->max_pf_txqs);
+}
+
+/**
+ * ice_get_avail_rxq_count - Get count of Rx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_rxq_count(struct ice_pf *pf)
+{
+	return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex,
+				     pf->max_pf_rxqs);
+}
+
+/**
+ * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
+ * @pf: board private structure to initialize
+ */
+static void ice_deinit_pf(struct ice_pf *pf)
+{
+	ice_service_task_stop(pf);
+	mutex_destroy(&pf->sw_mutex);
+	mutex_destroy(&pf->tc_mutex);
+	mutex_destroy(&pf->avail_q_mutex);
+
+	if (pf->avail_txqs) {
+		bitmap_free(pf->avail_txqs);
+		pf->avail_txqs = NULL;
+	}
+
+	if (pf->avail_rxqs) {
+		bitmap_free(pf->avail_rxqs);
+		pf->avail_rxqs = NULL;
+	}
+}
+
+/**
+ * ice_set_pf_caps - set PFs capability flags
+ * @pf: pointer to the PF instance
+ */
+static void ice_set_pf_caps(struct ice_pf *pf)
+{
+	struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
+
+	clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+	if (func_caps->common_cap.dcb)
+		set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+	clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+	if (func_caps->common_cap.sr_iov_1_1) {
+		set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+		pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs,
+					      ICE_MAX_VF_COUNT);
+	}
+	clear_bit(ICE_FLAG_RSS_ENA, pf->flags);
+	if (func_caps->common_cap.rss_table_size)
+		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
+
+	clear_bit(ICE_FLAG_FD_ENA, pf->flags);
+	if (func_caps->fd_fltr_guar > 0 || func_caps->fd_fltr_best_effort > 0) {
+		u16 unused;
+
+		/* ctrl_vsi_idx will be set to a valid value when flow director
+		 * is setup by ice_init_fdir
+		 */
+		pf->ctrl_vsi_idx = ICE_NO_VSI;
+		set_bit(ICE_FLAG_FD_ENA, pf->flags);
+		/* force guaranteed filter pool for PF */
+		ice_alloc_fd_guar_item(&pf->hw, &unused,
+				       func_caps->fd_fltr_guar);
+		/* force shared filter pool for PF */
+		ice_alloc_fd_shrd_item(&pf->hw, &unused,
+				       func_caps->fd_fltr_best_effort);
+	}
+
+	pf->max_pf_txqs = func_caps->common_cap.num_txq;
+	pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
+}
+
+/**
+ * ice_init_pf - Initialize general software structures (struct ice_pf)
+ * @pf: board private structure to initialize
+ */
+static int ice_init_pf(struct ice_pf *pf)
+{
+	ice_set_pf_caps(pf);
+
+	mutex_init(&pf->sw_mutex);
+	mutex_init(&pf->tc_mutex);
+
+	INIT_HLIST_HEAD(&pf->aq_wait_list);
+	spin_lock_init(&pf->aq_wait_lock);
+	init_waitqueue_head(&pf->aq_wait_queue);
+
+	/* setup service timer and periodic service task */
+	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
+	pf->serv_tmr_period = HZ;
+	INIT_WORK(&pf->serv_task, ice_service_task);
+	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+
+	mutex_init(&pf->avail_q_mutex);
+	pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
+	if (!pf->avail_txqs)
+		return -ENOMEM;
+
+	pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
+	if (!pf->avail_rxqs) {
+		bitmap_free(pf->avail_txqs);
+		pf->avail_txqs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ena_msix_range - Request a range of MSIX vectors from the OS
+ * @pf: board private structure
+ *
+ * compute the number of MSIX vectors required (v_budget) and request from
+ * the OS. Return the number of vectors reserved or negative on failure
+ */
+static int ice_ena_msix_range(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int v_left, v_actual, v_budget = 0;
+	int needed, err, i;
+
+	v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
+
+	/* reserve one vector for miscellaneous handler */
+	needed = 1;
+	if (v_left < needed)
+		goto no_hw_vecs_left_err;
+	v_budget += needed;
+	v_left -= needed;
+
+	/* reserve vectors for LAN traffic */
+	needed = min_t(int, num_online_cpus(), v_left);
+	if (v_left < needed)
+		goto no_hw_vecs_left_err;
+	pf->num_lan_msix = needed;
+	v_budget += needed;
+	v_left -= needed;
+
+	/* reserve one vector for flow director */
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+		needed = ICE_FDIR_MSIX;
+		if (v_left < needed)
+			goto no_hw_vecs_left_err;
+		v_budget += needed;
+		v_left -= needed;
+	}
+
+	pf->msix_entries = devm_kcalloc(dev, v_budget,
+					sizeof(*pf->msix_entries), GFP_KERNEL);
+
+	if (!pf->msix_entries) {
+		err = -ENOMEM;
+		goto exit_err;
+	}
+
+	for (i = 0; i < v_budget; i++)
+		pf->msix_entries[i].entry = i;
+
+	/* actually reserve the vectors */
+	v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
+					 ICE_MIN_MSIX, v_budget);
+
+	if (v_actual < 0) {
+		dev_err(dev, "unable to reserve MSI-X vectors\n");
+		err = v_actual;
+		goto msix_err;
+	}
+
+	if (v_actual < v_budget) {
+		dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
+			 v_budget, v_actual);
+
+		if (v_actual < ICE_MIN_MSIX) {
+			/* error if we can't get minimum vectors */
+			pci_disable_msix(pf->pdev);
+			err = -ERANGE;
+			goto msix_err;
+		} else {
+			pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
+		}
+	}
+
+	return v_actual;
+
+msix_err:
+	devm_kfree(dev, pf->msix_entries);
+	goto exit_err;
+
+no_hw_vecs_left_err:
+	dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n",
+		needed, v_left);
+	err = -ERANGE;
+exit_err:
+	pf->num_lan_msix = 0;
+	return err;
+}
+
+/**
+ * ice_dis_msix - Disable MSI-X interrupt setup in OS
+ * @pf: board private structure
+ */
+static void ice_dis_msix(struct ice_pf *pf)
+{
+	pci_disable_msix(pf->pdev);
+	devm_kfree(ice_pf_to_dev(pf), pf->msix_entries);
+	pf->msix_entries = NULL;
+}
+
+/**
+ * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
+ * @pf: board private structure
+ */
+static void ice_clear_interrupt_scheme(struct ice_pf *pf)
+{
+	ice_dis_msix(pf);
+
+	if (pf->irq_tracker) {
+		devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker);
+		pf->irq_tracker = NULL;
+	}
+}
+
+/**
+ * ice_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ */
+static int ice_init_interrupt_scheme(struct ice_pf *pf)
+{
+	int vectors;
+
+	vectors = ice_ena_msix_range(pf);
+
+	if (vectors < 0)
+		return vectors;
+
+	/* set up vector assignment tracking */
+	pf->irq_tracker =
+		devm_kzalloc(ice_pf_to_dev(pf), sizeof(*pf->irq_tracker) +
+			     (sizeof(u16) * vectors), GFP_KERNEL);
+	if (!pf->irq_tracker) {
+		ice_dis_msix(pf);
+		return -ENOMEM;
+	}
+
+	/* populate SW interrupts pool with number of OS granted IRQs. */
+	pf->num_avail_sw_msix = (u16)vectors;
+	pf->irq_tracker->num_entries = (u16)vectors;
+	pf->irq_tracker->end = pf->irq_tracker->num_entries;
+
+	return 0;
+}
+
+/**
+ * ice_is_wol_supported - check if WoL is supported
+ * @hw: pointer to hardware info
+ *
+ * Check if WoL is supported based on the HW configuration.
+ * Returns true if NVM supports and enables WoL for this port, false otherwise
+ */
+bool ice_is_wol_supported(struct ice_hw *hw)
+{
+	u16 wol_ctrl;
+
+	/* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
+	 * word) indicates WoL is not supported on the corresponding PF ID.
+	 */
+	if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl))
+		return false;
+
+	return !(BIT(hw->port_info->lport) & wol_ctrl);
+}
+
+/**
+ * ice_vsi_recfg_qs - Change the number of queues on a VSI
+ * @vsi: VSI being changed
+ * @new_rx: new number of Rx queues
+ * @new_tx: new number of Tx queues
+ *
+ * Only change the number of queues if new_tx, or new_rx is non-0.
+ *
+ * Returns 0 on success.
+ */
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
+{
+	struct ice_pf *pf = vsi->back;
+	int err = 0, timeout = 50;
+
+	if (!new_rx && !new_tx)
+		return -EINVAL;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	if (new_tx)
+		vsi->req_txq = (u16)new_tx;
+	if (new_rx)
+		vsi->req_rxq = (u16)new_rx;
+
+	/* set for the next time the netdev is started */
+	if (!netif_running(vsi->netdev)) {
+		ice_vsi_rebuild(vsi, false);
+		dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
+		goto done;
+	}
+
+	ice_vsi_close(vsi);
+	ice_vsi_rebuild(vsi, false);
+	ice_pf_dcb_recfg(pf);
+	ice_vsi_open(vsi);
+done:
+	clear_bit(__ICE_CFG_BUSY, pf->state);
+	return err;
+}
+
+/**
+ * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
+ * @pf: PF to configure
+ *
+ * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
+ * VSI can still Tx/Rx VLAN tagged packets.
+ */
+static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	if (!vsi)
+		return;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return;
+
+	hw = &pf->hw;
+	ctxt->info = vsi->info;
+
+	ctxt->info.valid_sections =
+		cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+			    ICE_AQ_VSI_PROP_SECURITY_VALID |
+			    ICE_AQ_VSI_PROP_SW_VALID);
+
+	/* disable VLAN anti-spoof */
+	ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+				  ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+
+	/* disable VLAN pruning and keep all other settings */
+	ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+	/* allow all VLANs on Tx and don't strip on Rx */
+	ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL |
+		ICE_AQ_VSI_VLAN_EMOD_NOTHING;
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+	} else {
+		vsi->info.sec_flags = ctxt->info.sec_flags;
+		vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+		vsi->info.vlan_flags = ctxt->info.vlan_flags;
+	}
+
+	kfree(ctxt);
+}
+
+/**
+ * ice_log_pkg_init - log result of DDP package load
+ * @hw: pointer to hardware info
+ * @status: status of package load
+ */
+static void
+ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
+{
+	struct ice_pf *pf = (struct ice_pf *)hw->back;
+	struct device *dev = ice_pf_to_dev(pf);
+
+	switch (*status) {
+	case ICE_SUCCESS:
+		/* The package download AdminQ command returned success because
+		 * this download succeeded or ICE_ERR_AQ_NO_WORK since there is
+		 * already a package loaded on the device.
+		 */
+		if (hw->pkg_ver.major == hw->active_pkg_ver.major &&
+		    hw->pkg_ver.minor == hw->active_pkg_ver.minor &&
+		    hw->pkg_ver.update == hw->active_pkg_ver.update &&
+		    hw->pkg_ver.draft == hw->active_pkg_ver.draft &&
+		    !memcmp(hw->pkg_name, hw->active_pkg_name,
+			    sizeof(hw->pkg_name))) {
+			if (hw->pkg_dwnld_status == ICE_AQ_RC_EEXIST)
+				dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
+					 hw->active_pkg_name,
+					 hw->active_pkg_ver.major,
+					 hw->active_pkg_ver.minor,
+					 hw->active_pkg_ver.update,
+					 hw->active_pkg_ver.draft);
+			else
+				dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
+					 hw->active_pkg_name,
+					 hw->active_pkg_ver.major,
+					 hw->active_pkg_ver.minor,
+					 hw->active_pkg_ver.update,
+					 hw->active_pkg_ver.draft);
+		} else if (hw->active_pkg_ver.major != ICE_PKG_SUPP_VER_MAJ ||
+			   hw->active_pkg_ver.minor != ICE_PKG_SUPP_VER_MNR) {
+			dev_err(dev, "The device has a DDP package that is not supported by the driver.  The device has package '%s' version %d.%d.x.x.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
+				hw->active_pkg_name,
+				hw->active_pkg_ver.major,
+				hw->active_pkg_ver.minor,
+				ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
+			*status = ICE_ERR_NOT_SUPPORTED;
+		} else if (hw->active_pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
+			   hw->active_pkg_ver.minor == ICE_PKG_SUPP_VER_MNR) {
+			dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package '%s' version %d.%d.%d.%d.  The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
+				 hw->active_pkg_name,
+				 hw->active_pkg_ver.major,
+				 hw->active_pkg_ver.minor,
+				 hw->active_pkg_ver.update,
+				 hw->active_pkg_ver.draft,
+				 hw->pkg_name,
+				 hw->pkg_ver.major,
+				 hw->pkg_ver.minor,
+				 hw->pkg_ver.update,
+				 hw->pkg_ver.draft);
+		} else {
+			dev_err(dev, "An unknown error occurred when loading the DDP package, please reboot the system.  If the problem persists, update the NVM.  Entering Safe Mode.\n");
+			*status = ICE_ERR_NOT_SUPPORTED;
+		}
+		break;
+	case ICE_ERR_FW_DDP_MISMATCH:
+		dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
+		break;
+	case ICE_ERR_BUF_TOO_SHORT:
+	case ICE_ERR_CFG:
+		dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
+		break;
+	case ICE_ERR_NOT_SUPPORTED:
+		/* Package File version not supported */
+		if (hw->pkg_ver.major > ICE_PKG_SUPP_VER_MAJ ||
+		    (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
+		     hw->pkg_ver.minor > ICE_PKG_SUPP_VER_MNR))
+			dev_err(dev, "The DDP package file version is higher than the driver supports.  Please use an updated driver.  Entering Safe Mode.\n");
+		else if (hw->pkg_ver.major < ICE_PKG_SUPP_VER_MAJ ||
+			 (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
+			  hw->pkg_ver.minor < ICE_PKG_SUPP_VER_MNR))
+			dev_err(dev, "The DDP package file version is lower than the driver supports.  The driver requires version %d.%d.x.x.  Please use an updated DDP Package file.  Entering Safe Mode.\n",
+				ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
+		break;
+	case ICE_ERR_AQ_ERROR:
+		switch (hw->pkg_dwnld_status) {
+		case ICE_AQ_RC_ENOSEC:
+		case ICE_AQ_RC_EBADSIG:
+			dev_err(dev, "The DDP package could not be loaded because its signature is not valid.  Please use a valid DDP Package.  Entering Safe Mode.\n");
+			return;
+		case ICE_AQ_RC_ESVN:
+			dev_err(dev, "The DDP Package could not be loaded because its security revision is too low.  Please use an updated DDP Package.  Entering Safe Mode.\n");
+			return;
+		case ICE_AQ_RC_EBADMAN:
+		case ICE_AQ_RC_EBADBUF:
+			dev_err(dev, "An error occurred on the device while loading the DDP package.  The device will be reset.\n");
+			/* poll for reset to complete */
+			if (ice_check_reset(hw))
+				dev_err(dev, "Error resetting device. Please reload the driver\n");
+			return;
+		default:
+			break;
+		}
+		fallthrough;
+	default:
+		dev_err(dev, "An unknown error (%d) occurred when loading the DDP package.  Entering Safe Mode.\n",
+			*status);
+		break;
+	}
+}
+
+/**
+ * ice_load_pkg - load/reload the DDP Package file
+ * @firmware: firmware structure when firmware requested or NULL for reload
+ * @pf: pointer to the PF instance
+ *
+ * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
+ * initialize HW tables.
+ */
+static void
+ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+
+	/* Load DDP Package */
+	if (firmware && !hw->pkg_copy) {
+		status = ice_copy_and_init_pkg(hw, firmware->data,
+					       firmware->size);
+		ice_log_pkg_init(hw, &status);
+	} else if (!firmware && hw->pkg_copy) {
+		/* Reload package during rebuild after CORER/GLOBR reset */
+		status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
+		ice_log_pkg_init(hw, &status);
+	} else {
+		dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
+	}
+
+	if (status) {
+		/* Safe Mode */
+		clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+		return;
+	}
+
+	/* Successful download package is the precondition for advanced
+	 * features, hence setting the ICE_FLAG_ADV_FEATURES flag
+	 */
+	set_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+}
+
+/**
+ * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
+ * @pf: pointer to the PF structure
+ *
+ * There is no error returned here because the driver should be able to handle
+ * 128 Byte cache lines, so we only print a warning in case issues are seen,
+ * specifically with Tx.
+ */
+static void ice_verify_cacheline_size(struct ice_pf *pf)
+{
+	if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
+		dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
+			 ICE_CACHE_LINE_BYTES);
+}
+
+/**
+ * ice_send_version - update firmware with driver version
+ * @pf: PF struct
+ *
+ * Returns ICE_SUCCESS on success, else error code
+ */
+static enum ice_status ice_send_version(struct ice_pf *pf)
+{
+	struct ice_driver_ver dv;
+
+	dv.major_ver = 0xff;
+	dv.minor_ver = 0xff;
+	dv.build_ver = 0xff;
+	dv.subbuild_ver = 0;
+	strscpy((char *)dv.driver_string, UTS_RELEASE,
+		sizeof(dv.driver_string));
+	return ice_aq_send_driver_ver(&pf->hw, &dv, NULL);
+}
+
+/**
+ * ice_init_fdir - Initialize flow director VSI and configuration
+ * @pf: pointer to the PF instance
+ *
+ * returns 0 on success, negative on error
+ */
+static int ice_init_fdir(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *ctrl_vsi;
+	int err;
+
+	/* Side Band Flow Director needs to have a control VSI.
+	 * Allocate it and store it in the PF.
+	 */
+	ctrl_vsi = ice_ctrl_vsi_setup(pf, pf->hw.port_info);
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "could not create control VSI\n");
+		return -ENOMEM;
+	}
+
+	err = ice_vsi_open_ctrl(ctrl_vsi);
+	if (err) {
+		dev_dbg(dev, "could not open control VSI\n");
+		goto err_vsi_open;
+	}
+
+	mutex_init(&pf->hw.fdir_fltr_lock);
+
+	err = ice_fdir_create_dflt_rules(pf);
+	if (err)
+		goto err_fdir_rule;
+
+	return 0;
+
+err_fdir_rule:
+	ice_fdir_release_flows(&pf->hw);
+	ice_vsi_close(ctrl_vsi);
+err_vsi_open:
+	ice_vsi_release(ctrl_vsi);
+	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
+		pf->vsi[pf->ctrl_vsi_idx] = NULL;
+		pf->ctrl_vsi_idx = ICE_NO_VSI;
+	}
+	return err;
+}
+
+/**
+ * ice_get_opt_fw_name - return optional firmware file name or NULL
+ * @pf: pointer to the PF instance
+ */
+static char *ice_get_opt_fw_name(struct ice_pf *pf)
+{
+	/* Optional firmware name same as default with additional dash
+	 * followed by a EUI-64 identifier (PCIe Device Serial Number)
+	 */
+	struct pci_dev *pdev = pf->pdev;
+	char *opt_fw_filename;
+	u64 dsn;
+
+	/* Determine the name of the optional file using the DSN (two
+	 * dwords following the start of the DSN Capability).
+	 */
+	dsn = pci_get_dsn(pdev);
+	if (!dsn)
+		return NULL;
+
+	opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
+	if (!opt_fw_filename)
+		return NULL;
+
+	snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llx.pkg",
+		 ICE_DDP_PKG_PATH, dsn);
+
+	return opt_fw_filename;
+}
+
+/**
+ * ice_request_fw - Device initialization routine
+ * @pf: pointer to the PF instance
+ */
+static void ice_request_fw(struct ice_pf *pf)
+{
+	char *opt_fw_filename = ice_get_opt_fw_name(pf);
+	const struct firmware *firmware = NULL;
+	struct device *dev = ice_pf_to_dev(pf);
+	int err = 0;
+
+	/* optional device-specific DDP (if present) overrides the default DDP
+	 * package file. kernel logs a debug message if the file doesn't exist,
+	 * and warning messages for other errors.
+	 */
+	if (opt_fw_filename) {
+		err = firmware_request_nowarn(&firmware, opt_fw_filename, dev);
+		if (err) {
+			kfree(opt_fw_filename);
+			goto dflt_pkg_load;
+		}
+
+		/* request for firmware was successful. Download to device */
+		ice_load_pkg(firmware, pf);
+		kfree(opt_fw_filename);
+		release_firmware(firmware);
+		return;
+	}
+
+dflt_pkg_load:
+	err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev);
+	if (err) {
+		dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
+		return;
+	}
+
+	/* request for firmware was successful. Download to device */
+	ice_load_pkg(firmware, pf);
+	release_firmware(firmware);
+}
+
+/**
+ * ice_print_wake_reason - show the wake up cause in the log
+ * @pf: pointer to the PF struct
+ */
+static void ice_print_wake_reason(struct ice_pf *pf)
+{
+	u32 wus = pf->wakeup_reason;
+	const char *wake_str;
+
+	/* if no wake event, nothing to print */
+	if (!wus)
+		return;
+
+	if (wus & PFPM_WUS_LNKC_M)
+		wake_str = "Link\n";
+	else if (wus & PFPM_WUS_MAG_M)
+		wake_str = "Magic Packet\n";
+	else if (wus & PFPM_WUS_MNG_M)
+		wake_str = "Management\n";
+	else if (wus & PFPM_WUS_FW_RST_WK_M)
+		wake_str = "Firmware Reset\n";
+	else
+		wake_str = "Unknown\n";
+
+	dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
+}
+
+/**
+ * ice_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ice_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int i, err;
+
+	if (pdev->is_virtfn) {
+		dev_err(dev, "can't probe a virtual function\n");
+		return -EINVAL;
+	}
+
+	/* when under a kdump kernel initiate a reset before enabling the
+	 * device in order to clear out any pending DMA transactions. These
+	 * transactions can cause some systems to machine check when doing
+	 * the pcim_enable_device() below.
+	 */
+	if (is_kdump_kernel()) {
+		pci_save_state(pdev);
+		pci_clear_master(pdev);
+		err = pcie_flr(pdev);
+		if (err)
+			return err;
+		pci_restore_state(pdev);
+	}
+
+	/* this driver uses devres, see
+	 * Documentation/driver-api/driver-model/devres.rst
+	 */
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev));
+	if (err) {
+		dev_err(dev, "BAR0 I/O map error %d\n", err);
+		return err;
+	}
+
+	pf = ice_allocate_pf(dev);
+	if (!pf)
+		return -ENOMEM;
+
+	/* set up for high or low DMA */
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (err)
+		err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (err) {
+		dev_err(dev, "DMA configuration failed: 0x%x\n", err);
+		return err;
+	}
+
+	pci_enable_pcie_error_reporting(pdev);
+	pci_set_master(pdev);
+
+	pf->pdev = pdev;
+	pci_set_drvdata(pdev, pf);
+	set_bit(__ICE_DOWN, pf->state);
+	/* Disable service task until DOWN bit is cleared */
+	set_bit(__ICE_SERVICE_DIS, pf->state);
+
+	hw = &pf->hw;
+	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
+	pci_save_state(pdev);
+
+	hw->back = pf;
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+	hw->bus.device = PCI_SLOT(pdev->devfn);
+	hw->bus.func = PCI_FUNC(pdev->devfn);
+	ice_set_ctrlq_len(hw);
+
+	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
+
+	err = ice_devlink_register(pf);
+	if (err) {
+		dev_err(dev, "ice_devlink_register failed: %d\n", err);
+		goto err_exit_unroll;
+	}
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (debug < -1)
+		hw->debug_mask = debug;
+#endif
+
+	err = ice_init_hw(hw);
+	if (err) {
+		dev_err(dev, "ice_init_hw failed: %d\n", err);
+		err = -EIO;
+		goto err_exit_unroll;
+	}
+
+	ice_request_fw(pf);
+
+	/* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be
+	 * set in pf->state, which will cause ice_is_safe_mode to return
+	 * true
+	 */
+	if (ice_is_safe_mode(pf)) {
+		dev_err(dev, "Package download failed. Advanced features disabled - Device now in Safe Mode\n");
+		/* we already got function/device capabilities but these don't
+		 * reflect what the driver needs to do in safe mode. Instead of
+		 * adding conditional logic everywhere to ignore these
+		 * device/function capabilities, override them.
+		 */
+		ice_set_safe_mode_caps(hw);
+	}
+
+	err = ice_init_pf(pf);
+	if (err) {
+		dev_err(dev, "ice_init_pf failed: %d\n", err);
+		goto err_init_pf_unroll;
+	}
+
+	ice_devlink_init_regions(pf);
+
+	pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
+	pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
+	pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
+	pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
+	i = 0;
+	if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
+		pf->hw.udp_tunnel_nic.tables[i].n_entries =
+			pf->hw.tnl.valid_count[TNL_VXLAN];
+		pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
+			UDP_TUNNEL_TYPE_VXLAN;
+		i++;
+	}
+	if (pf->hw.tnl.valid_count[TNL_GENEVE]) {
+		pf->hw.udp_tunnel_nic.tables[i].n_entries =
+			pf->hw.tnl.valid_count[TNL_GENEVE];
+		pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
+			UDP_TUNNEL_TYPE_GENEVE;
+		i++;
+	}
+
+	pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
+	if (!pf->num_alloc_vsi) {
+		err = -EIO;
+		goto err_init_pf_unroll;
+	}
+	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
+		dev_warn(&pf->pdev->dev,
+			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
+			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
+		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
+	}
+
+	pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi),
+			       GFP_KERNEL);
+	if (!pf->vsi) {
+		err = -ENOMEM;
+		goto err_init_pf_unroll;
+	}
+
+	err = ice_init_interrupt_scheme(pf);
+	if (err) {
+		dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
+		err = -EIO;
+		goto err_init_vsi_unroll;
+	}
+
+	/* In case of MSIX we are going to setup the misc vector right here
+	 * to handle admin queue events etc. In case of legacy and MSI
+	 * the misc functionality and queue processing is combined in
+	 * the same vector and that gets setup at open.
+	 */
+	err = ice_req_irq_msix_misc(pf);
+	if (err) {
+		dev_err(dev, "setup of misc vector failed: %d\n", err);
+		goto err_init_interrupt_unroll;
+	}
+
+	/* create switch struct for the switch element created by FW on boot */
+	pf->first_sw = devm_kzalloc(dev, sizeof(*pf->first_sw), GFP_KERNEL);
+	if (!pf->first_sw) {
+		err = -ENOMEM;
+		goto err_msix_misc_unroll;
+	}
+
+	if (hw->evb_veb)
+		pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
+	else
+		pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
+
+	pf->first_sw->pf = pf;
+
+	/* record the sw_id available for later use */
+	pf->first_sw->sw_id = hw->port_info->sw_id;
+
+	err = ice_setup_pf_sw(pf);
+	if (err) {
+		dev_err(dev, "probe failed due to setup PF switch: %d\n", err);
+		goto err_alloc_sw_unroll;
+	}
+
+	clear_bit(__ICE_SERVICE_DIS, pf->state);
+
+	/* tell the firmware we are up */
+	err = ice_send_version(pf);
+	if (err) {
+		dev_err(dev, "probe failed sending driver version %s. error: %d\n",
+			UTS_RELEASE, err);
+		goto err_send_version_unroll;
+	}
+
+	/* since everything is good, start the service timer */
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
+	err = ice_init_link_events(pf->hw.port_info);
+	if (err) {
+		dev_err(dev, "ice_init_link_events failed: %d\n", err);
+		goto err_send_version_unroll;
+	}
+
+	/* not a fatal error if this fails */
+	err = ice_init_nvm_phy_type(pf->hw.port_info);
+	if (err)
+		dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
+
+	/* not a fatal error if this fails */
+	err = ice_update_link_info(pf->hw.port_info);
+	if (err)
+		dev_err(dev, "ice_update_link_info failed: %d\n", err);
+
+	ice_init_link_dflt_override(pf->hw.port_info);
+
+	/* if media available, initialize PHY settings */
+	if (pf->hw.port_info->phy.link_info.link_info &
+	    ICE_AQ_MEDIA_AVAILABLE) {
+		/* not a fatal error if this fails */
+		err = ice_init_phy_user_cfg(pf->hw.port_info);
+		if (err)
+			dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
+
+		if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
+			struct ice_vsi *vsi = ice_get_main_vsi(pf);
+
+			if (vsi)
+				ice_configure_phy(vsi);
+		}
+	} else {
+		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+	}
+
+	ice_verify_cacheline_size(pf);
+
+	/* Save wakeup reason register for later use */
+	pf->wakeup_reason = rd32(hw, PFPM_WUS);
+
+	/* check for a power management event */
+	ice_print_wake_reason(pf);
+
+	/* clear wake status, all bits */
+	wr32(hw, PFPM_WUS, U32_MAX);
+
+	/* Disable WoL at init, wait for user to enable */
+	device_set_wakeup_enable(dev, false);
+
+	if (ice_is_safe_mode(pf)) {
+		ice_set_safe_mode_vlan_cfg(pf);
+		goto probe_done;
+	}
+
+	/* initialize DDP driven features */
+
+	/* Note: Flow director init failure is non-fatal to load */
+	if (ice_init_fdir(pf))
+		dev_err(dev, "could not initialize flow director\n");
+
+	/* Note: DCB init failure is non-fatal to load */
+	if (ice_init_pf_dcb(pf, false)) {
+		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		ice_cfg_lldp_mib_change(&pf->hw, true);
+	}
+
+	/* print PCI link speed and width */
+	pcie_print_link_status(pf->pdev);
+
+probe_done:
+	/* ready to go, so clear down state bit */
+	clear_bit(__ICE_DOWN, pf->state);
+	return 0;
+
+err_send_version_unroll:
+	ice_vsi_release_all(pf);
+err_alloc_sw_unroll:
+	set_bit(__ICE_SERVICE_DIS, pf->state);
+	set_bit(__ICE_DOWN, pf->state);
+	devm_kfree(dev, pf->first_sw);
+err_msix_misc_unroll:
+	ice_free_irq_msix_misc(pf);
+err_init_interrupt_unroll:
+	ice_clear_interrupt_scheme(pf);
+err_init_vsi_unroll:
+	devm_kfree(dev, pf->vsi);
+err_init_pf_unroll:
+	ice_deinit_pf(pf);
+	ice_devlink_destroy_regions(pf);
+	ice_deinit_hw(hw);
+err_exit_unroll:
+	ice_devlink_unregister(pf);
+	pci_disable_pcie_error_reporting(pdev);
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * ice_set_wake - enable or disable Wake on LAN
+ * @pf: pointer to the PF struct
+ *
+ * Simple helper for WoL control
+ */
+static void ice_set_wake(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	bool wol = pf->wol_ena;
+
+	/* clear wake state, otherwise new wake events won't fire */
+	wr32(hw, PFPM_WUS, U32_MAX);
+
+	/* enable / disable APM wake up, no RMW needed */
+	wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : 0);
+
+	/* set magic packet filter enabled */
+	wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : 0);
+}
+
+/**
+ * ice_setup_magic_mc_wake - setup device to wake on multicast magic packet
+ * @pf: pointer to the PF struct
+ *
+ * Issue firmware command to enable multicast magic wake, making
+ * sure that any locally administered address (LAA) is used for
+ * wake, and that PF reset doesn't undo the LAA.
+ */
+static void ice_setup_mc_magic_wake(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u8 mac_addr[ETH_ALEN];
+	struct ice_vsi *vsi;
+	u8 flags;
+
+	if (!pf->wol_ena)
+		return;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* Get current MAC address in case it's an LAA */
+	if (vsi->netdev)
+		ether_addr_copy(mac_addr, vsi->netdev->dev_addr);
+	else
+		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+
+	flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN |
+		ICE_AQC_MAN_MAC_UPDATE_LAA_WOL |
+		ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP;
+
+	status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL);
+	if (status)
+		dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+}
+
+/**
+ * ice_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void ice_remove(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
+		if (!ice_is_reset_in_progress(pf->state))
+			break;
+		msleep(100);
+	}
+
+	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
+		set_bit(__ICE_VF_RESETS_DISABLED, pf->state);
+		ice_free_vfs(pf);
+	}
+
+	set_bit(__ICE_DOWN, pf->state);
+	ice_service_task_stop(pf);
+
+	ice_aq_cancel_waiting_tasks(pf);
+
+	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
+	if (!ice_is_safe_mode(pf))
+		ice_remove_arfs(pf);
+	ice_setup_mc_magic_wake(pf);
+	ice_vsi_release_all(pf);
+	ice_set_wake(pf);
+	ice_free_irq_msix_misc(pf);
+	ice_for_each_vsi(pf, i) {
+		if (!pf->vsi[i])
+			continue;
+		ice_vsi_free_q_vectors(pf->vsi[i]);
+	}
+	ice_deinit_pf(pf);
+	ice_devlink_destroy_regions(pf);
+	ice_deinit_hw(&pf->hw);
+	ice_devlink_unregister(pf);
+
+	/* Issue a PFR as part of the prescribed driver unload flow.  Do not
+	 * do it via ice_schedule_reset() since there is no need to rebuild
+	 * and the service task is already stopped.
+	 */
+	ice_reset(&pf->hw, ICE_RESET_PFR);
+	pci_wait_for_pending_transaction(pdev);
+	ice_clear_interrupt_scheme(pf);
+	pci_disable_pcie_error_reporting(pdev);
+	pci_disable_device(pdev);
+}
+
+/**
+ * ice_shutdown - PCI callback for shutting down device
+ * @pdev: PCI device information struct
+ */
+static void ice_shutdown(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	ice_remove(pdev);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, pf->wol_ena);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+#ifdef CONFIG_PM
+/**
+ * ice_prepare_for_shutdown - prep for PCI shutdown
+ * @pf: board private structure
+ *
+ * Inform or close all dependent features in prep for PCI device shutdown
+ */
+static void ice_prepare_for_shutdown(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 v;
+
+	/* Notify VFs of impending reset */
+	if (ice_check_sq_alive(hw, &hw->mailboxq))
+		ice_vc_notify_reset(pf);
+
+	dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n");
+
+	/* disable the VSIs and their queues that are not already DOWN */
+	ice_pf_dis_all_vsi(pf, false);
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			pf->vsi[v]->vsi_num = 0;
+
+	ice_shutdown_all_ctrlq(hw);
+}
+
+/**
+ * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
+ * @pf: board private structure to reinitialize
+ *
+ * This routine reinitialize interrupt scheme that was cleared during
+ * power management suspend callback.
+ *
+ * This should be called during resume routine to re-allocate the q_vectors
+ * and reacquire interrupts.
+ */
+static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret, v;
+
+	/* Since we clear MSIX flag during suspend, we need to
+	 * set it back during resume...
+	 */
+
+	ret = ice_init_interrupt_scheme(pf);
+	if (ret) {
+		dev_err(dev, "Failed to re-initialize interrupt %d\n", ret);
+		return ret;
+	}
+
+	/* Remap vectors and rings, after successful re-init interrupts */
+	ice_for_each_vsi(pf, v) {
+		if (!pf->vsi[v])
+			continue;
+
+		ret = ice_vsi_alloc_q_vectors(pf->vsi[v]);
+		if (ret)
+			goto err_reinit;
+		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+	}
+
+	ret = ice_req_irq_msix_misc(pf);
+	if (ret) {
+		dev_err(dev, "Setting up misc vector failed after device suspend %d\n",
+			ret);
+		goto err_reinit;
+	}
+
+	return 0;
+
+err_reinit:
+	while (v--)
+		if (pf->vsi[v])
+			ice_vsi_free_q_vectors(pf->vsi[v]);
+
+	return ret;
+}
+
+/**
+ * ice_suspend
+ * @dev: generic device information structure
+ *
+ * Power Management callback to quiesce the device and prepare
+ * for D3 transition.
+ */
+static int __maybe_unused ice_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct ice_pf *pf;
+	int disabled, v;
+
+	pf = pci_get_drvdata(pdev);
+
+	if (!ice_pf_state_is_nominal(pf)) {
+		dev_err(dev, "Device is not ready, no need to suspend it\n");
+		return -EBUSY;
+	}
+
+	/* Stop watchdog tasks until resume completion.
+	 * Even though it is most likely that the service task is
+	 * disabled if the device is suspended or down, the service task's
+	 * state is controlled by a different state bit, and we should
+	 * store and honor whatever state that bit is in at this point.
+	 */
+	disabled = ice_service_task_stop(pf);
+
+	/* Already suspended?, then there is nothing to do */
+	if (test_and_set_bit(__ICE_SUSPENDED, pf->state)) {
+		if (!disabled)
+			ice_service_task_restart(pf);
+		return 0;
+	}
+
+	if (test_bit(__ICE_DOWN, pf->state) ||
+	    ice_is_reset_in_progress(pf->state)) {
+		dev_err(dev, "can't suspend device in reset or already down\n");
+		if (!disabled)
+			ice_service_task_restart(pf);
+		return 0;
+	}
+
+	ice_setup_mc_magic_wake(pf);
+
+	ice_prepare_for_shutdown(pf);
+
+	ice_set_wake(pf);
+
+	/* Free vectors, clear the interrupt scheme and release IRQs
+	 * for proper hibernation, especially with large number of CPUs.
+	 * Otherwise hibernation might fail when mapping all the vectors back
+	 * to CPU0.
+	 */
+	ice_free_irq_msix_misc(pf);
+	ice_for_each_vsi(pf, v) {
+		if (!pf->vsi[v])
+			continue;
+		ice_vsi_free_q_vectors(pf->vsi[v]);
+	}
+	ice_free_cpu_rx_rmap(ice_get_main_vsi(pf));
+	ice_clear_interrupt_scheme(pf);
+
+	pci_save_state(pdev);
+	pci_wake_from_d3(pdev, pf->wol_ena);
+	pci_set_power_state(pdev, PCI_D3hot);
+	return 0;
+}
+
+/**
+ * ice_resume - PM callback for waking up from D3
+ * @dev: generic device information structure
+ */
+static int __maybe_unused ice_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	enum ice_reset_req reset_type;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int ret;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+
+	if (!pci_device_is_present(pdev))
+		return -ENODEV;
+
+	ret = pci_enable_device_mem(pdev);
+	if (ret) {
+		dev_err(dev, "Cannot enable device after suspend\n");
+		return ret;
+	}
+
+	pf = pci_get_drvdata(pdev);
+	hw = &pf->hw;
+
+	pf->wakeup_reason = rd32(hw, PFPM_WUS);
+	ice_print_wake_reason(pf);
+
+	/* We cleared the interrupt scheme when we suspended, so we need to
+	 * restore it now to resume device functionality.
+	 */
+	ret = ice_reinit_interrupt_scheme(pf);
+	if (ret)
+		dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
+
+	clear_bit(__ICE_DOWN, pf->state);
+	/* Now perform PF reset and rebuild */
+	reset_type = ICE_RESET_PFR;
+	/* re-enable service task for reset, but allow reset to schedule it */
+	clear_bit(__ICE_SERVICE_DIS, pf->state);
+
+	if (ice_schedule_reset(pf, reset_type))
+		dev_err(dev, "Reset during resume failed.\n");
+
+	clear_bit(__ICE_SUSPENDED, pf->state);
+	ice_service_task_restart(pf);
+
+	/* Restart the service task */
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+/**
+ * ice_pci_err_detected - warning that PCI error has been detected
+ * @pdev: PCI device information struct
+ * @err: the type of PCI error
+ *
+ * Called to warn that something happened on the PCI bus and the error handling
+ * is in progress.  Allows the driver to gracefully prepare/handle PCI errors.
+ */
+static pci_ers_result_t
+ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!pf) {
+		dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
+			__func__, err);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	if (!test_bit(__ICE_SUSPENDED, pf->state)) {
+		ice_service_task_stop(pf);
+
+		if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) {
+			set_bit(__ICE_PFR_REQ, pf->state);
+			ice_prepare_for_reset(pf);
+		}
+	}
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ice_pci_err_slot_reset - a PCI slot reset has just happened
+ * @pdev: PCI device information struct
+ *
+ * Called to determine if the driver can recover from the PCI slot reset by
+ * using a register read to determine if the device is recoverable.
+ */
+static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+	pci_ers_result_t result;
+	int err;
+	u32 reg;
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n",
+			err);
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+		pci_wake_from_d3(pdev, false);
+
+		/* Check for life */
+		reg = rd32(&pf->hw, GLGEN_RTRIG);
+		if (!reg)
+			result = PCI_ERS_RESULT_RECOVERED;
+		else
+			result = PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	err = pci_aer_clear_nonfatal_status(pdev);
+	if (err)
+		dev_dbg(&pdev->dev, "pci_aer_clear_nonfatal_status() failed, error %d\n",
+			err);
+		/* non-fatal, continue */
+
+	return result;
+}
+
+/**
+ * ice_pci_err_resume - restart operations after PCI error recovery
+ * @pdev: PCI device information struct
+ *
+ * Called to allow the driver to bring things back up after PCI error and/or
+ * reset recovery have finished
+ */
+static void ice_pci_err_resume(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!pf) {
+		dev_err(&pdev->dev, "%s failed, device is unrecoverable\n",
+			__func__);
+		return;
+	}
+
+	if (test_bit(__ICE_SUSPENDED, pf->state)) {
+		dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
+			__func__);
+		return;
+	}
+
+	ice_restore_all_vfs_msi_state(pdev);
+
+	ice_do_reset(pf, ICE_RESET_PFR);
+	ice_service_task_restart(pf);
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+}
+
+/**
+ * ice_pci_err_reset_prepare - prepare device driver for PCI reset
+ * @pdev: PCI device information struct
+ */
+static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!test_bit(__ICE_SUSPENDED, pf->state)) {
+		ice_service_task_stop(pf);
+
+		if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) {
+			set_bit(__ICE_PFR_REQ, pf->state);
+			ice_prepare_for_reset(pf);
+		}
+	}
+}
+
+/**
+ * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+static void ice_pci_err_reset_done(struct pci_dev *pdev)
+{
+	ice_pci_err_resume(pdev);
+}
+
+/* ice_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id ice_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 },
+	/* required last entry */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
+
+static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume);
+
+static const struct pci_error_handlers ice_pci_err_handler = {
+	.error_detected = ice_pci_err_detected,
+	.slot_reset = ice_pci_err_slot_reset,
+	.reset_prepare = ice_pci_err_reset_prepare,
+	.reset_done = ice_pci_err_reset_done,
+	.resume = ice_pci_err_resume
+};
+
+static struct pci_driver ice_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = ice_pci_tbl,
+	.probe = ice_probe,
+	.remove = ice_remove,
+#ifdef CONFIG_PM
+	.driver.pm = &ice_pm_ops,
+#endif /* CONFIG_PM */
+	.shutdown = ice_shutdown,
+	.sriov_configure = ice_sriov_configure,
+	.err_handler = &ice_pci_err_handler
+};
+
+/**
+ * ice_module_init - Driver registration routine
+ *
+ * ice_module_init is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init ice_module_init(void)
+{
+	int status;
+
+	pr_info("%s\n", ice_driver_string);
+	pr_info("%s\n", ice_copyright);
+
+	ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME);
+	if (!ice_wq) {
+		pr_err("Failed to create workqueue\n");
+		return -ENOMEM;
+	}
+
+	status = pci_register_driver(&ice_driver);
+	if (status) {
+		pr_err("failed to register PCI driver, err %d\n", status);
+		destroy_workqueue(ice_wq);
+	}
+
+	return status;
+}
+module_init(ice_module_init);
+
+/**
+ * ice_module_exit - Driver exit cleanup routine
+ *
+ * ice_module_exit is called just before the driver is removed
+ * from memory.
+ */
+static void __exit ice_module_exit(void)
+{
+	pci_unregister_driver(&ice_driver);
+	destroy_workqueue(ice_wq);
+	pr_info("module unloaded\n");
+}
+module_exit(ice_module_exit);
+
+/**
+ * ice_set_mac_address - NDO callback to set MAC address
+ * @netdev: network interface device structure
+ * @pi: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_set_mac_address(struct net_device *netdev, void *pi)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct sockaddr *addr = pi;
+	enum ice_status status;
+	u8 old_mac[ETH_ALEN];
+	u8 flags = 0;
+	int err = 0;
+	u8 *mac;
+
+	mac = (u8 *)addr->sa_data;
+
+	if (!is_valid_ether_addr(mac))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(netdev->dev_addr, mac)) {
+		netdev_dbg(netdev, "already using mac %pM\n", mac);
+		return 0;
+	}
+
+	if (test_bit(__ICE_DOWN, pf->state) ||
+	    ice_is_reset_in_progress(pf->state)) {
+		netdev_err(netdev, "can't set mac %pM. device not ready\n",
+			   mac);
+		return -EBUSY;
+	}
+
+	netif_addr_lock_bh(netdev);
+	ether_addr_copy(old_mac, netdev->dev_addr);
+	/* change the netdev's MAC address */
+	memcpy(netdev->dev_addr, mac, netdev->addr_len);
+	netif_addr_unlock_bh(netdev);
+
+	/* Clean up old MAC filter. Not an error if old filter doesn't exist */
+	status = ice_fltr_remove_mac(vsi, old_mac, ICE_FWD_TO_VSI);
+	if (status && status != ICE_ERR_DOES_NOT_EXIST) {
+		err = -EADDRNOTAVAIL;
+		goto err_update_filters;
+	}
+
+	/* Add filter for new MAC. If filter exists, return success */
+	status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
+	if (status == ICE_ERR_ALREADY_EXISTS)
+		/* Although this MAC filter is already present in hardware it's
+		 * possible in some cases (e.g. bonding) that dev_addr was
+		 * modified outside of the driver and needs to be restored back
+		 * to this value.
+		 */
+		netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
+	else if (status)
+		/* error if the new filter addition failed */
+		err = -EADDRNOTAVAIL;
+
+err_update_filters:
+	if (err) {
+		netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
+			   mac);
+		netif_addr_lock_bh(netdev);
+		ether_addr_copy(netdev->dev_addr, old_mac);
+		netif_addr_unlock_bh(netdev);
+		return err;
+	}
+
+	netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
+		   netdev->dev_addr);
+
+	/* write new MAC address to the firmware */
+	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
+	status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
+	if (status) {
+		netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %s\n",
+			   mac, ice_stat_str(status));
+	}
+	return 0;
+}
+
+/**
+ * ice_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ */
+static void ice_set_rx_mode(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (!vsi)
+		return;
+
+	/* Set the flags to synchronize filters
+	 * ndo_set_rx_mode may be triggered even without a change in netdev
+	 * flags
+	 */
+	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
+
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	ice_service_task_schedule(vsi->back);
+}
+
+/**
+ * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
+ * @netdev: network interface device structure
+ * @queue_index: Queue ID
+ * @maxrate: maximum bandwidth in Mbps
+ */
+static int
+ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_status status;
+	u16 q_handle;
+	u8 tc;
+
+	/* Validate maxrate requested is within permitted range */
+	if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
+		netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n",
+			   maxrate, queue_index);
+		return -EINVAL;
+	}
+
+	q_handle = vsi->tx_rings[queue_index]->q_handle;
+	tc = ice_dcb_get_tc(vsi, queue_index);
+
+	/* Set BW back to default, when user set maxrate to 0 */
+	if (!maxrate)
+		status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
+					       q_handle, ICE_MAX_BW);
+	else
+		status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
+					  q_handle, ICE_MAX_BW, maxrate * 1000);
+	if (status) {
+		netdev_err(netdev, "Unable to set Tx max rate, error %s\n",
+			   ice_stat_str(status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fdb_add - add an entry to the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN ID
+ * @flags: instructions from stack about fdb operation
+ * @extack: netlink extended ack
+ */
+static int
+ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
+	    struct net_device *dev, const unsigned char *addr, u16 vid,
+	    u16 flags, struct netlink_ext_ack __always_unused *extack)
+{
+	int err;
+
+	if (vid) {
+		netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n");
+		return -EINVAL;
+	}
+	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+		netdev_err(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+		err = dev_uc_add_excl(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_add_excl(dev, addr);
+	else
+		err = -EINVAL;
+
+	/* Only return duplicate errors if NLM_F_EXCL is set */
+	if (err == -EEXIST && !(flags & NLM_F_EXCL))
+		err = 0;
+
+	return err;
+}
+
+/**
+ * ice_fdb_del - delete an entry from the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN ID
+ */
+static int
+ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
+	    struct net_device *dev, const unsigned char *addr,
+	    __always_unused u16 vid)
+{
+	int err;
+
+	if (ndm->ndm_state & NUD_PERMANENT) {
+		netdev_err(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr))
+		err = dev_uc_del(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_del(dev, addr);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+
+/**
+ * ice_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ */
+static int
+ice_set_features(struct net_device *netdev, netdev_features_t features)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int ret = 0;
+
+	/* Don't set any netdev advanced features with device in Safe Mode */
+	if (ice_is_safe_mode(vsi->back)) {
+		dev_err(ice_pf_to_dev(vsi->back), "Device is in Safe Mode - not enabling advanced netdev features\n");
+		return ret;
+	}
+
+	/* Do not change setting during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(ice_pf_to_dev(vsi->back), "Device is resetting, changing advanced netdev features temporarily unavailable.\n");
+		return -EBUSY;
+	}
+
+	/* Multiple features can be changed in one call so keep features in
+	 * separate if/else statements to guarantee each feature is checked
+	 */
+	if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
+		ret = ice_vsi_manage_rss_lut(vsi, true);
+	else if (!(features & NETIF_F_RXHASH) &&
+		 netdev->features & NETIF_F_RXHASH)
+		ret = ice_vsi_manage_rss_lut(vsi, false);
+
+	if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
+		ret = ice_vsi_manage_vlan_stripping(vsi, true);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
+		ret = ice_vsi_manage_vlan_stripping(vsi, false);
+
+	if ((features & NETIF_F_HW_VLAN_CTAG_TX) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+
+	if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ret = ice_cfg_vlan_pruning(vsi, true, false);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		ret = ice_cfg_vlan_pruning(vsi, false, false);
+
+	if ((features & NETIF_F_NTUPLE) &&
+	    !(netdev->features & NETIF_F_NTUPLE)) {
+		ice_vsi_manage_fdir(vsi, true);
+		ice_init_arfs(vsi);
+	} else if (!(features & NETIF_F_NTUPLE) &&
+		 (netdev->features & NETIF_F_NTUPLE)) {
+		ice_vsi_manage_fdir(vsi, false);
+		ice_clear_arfs(vsi);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_vlan_setup - Setup VLAN offload properties on a VSI
+ * @vsi: VSI to setup VLAN properties for
+ */
+static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
+{
+	int ret = 0;
+
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		ret = ice_vsi_manage_vlan_stripping(vsi, true);
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+
+	return ret;
+}
+
+/**
+ * ice_vsi_cfg - Setup the VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_vsi_cfg(struct ice_vsi *vsi)
+{
+	int err;
+
+	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+		ice_set_rx_mode(vsi->netdev);
+
+		err = ice_vsi_vlan_setup(vsi);
+		if (err)
+			return err;
+	}
+	ice_vsi_cfg_dcb_rings(vsi);
+
+	err = ice_vsi_cfg_lan_txqs(vsi);
+	if (!err && ice_is_xdp_ena_vsi(vsi))
+		err = ice_vsi_cfg_xdp_txqs(vsi);
+	if (!err)
+		err = ice_vsi_cfg_rxqs(vsi);
+
+	return err;
+}
+
+/**
+ * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_napi_enable_all(struct ice_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_enable(&q_vector->napi);
+	}
+}
+
+/**
+ * ice_up_complete - Finish the last steps of bringing up a connection
+ * @vsi: The VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_up_complete(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	ice_vsi_cfg_msix(vsi);
+
+	/* Enable only Rx rings, Tx rings were enabled by the FW when the
+	 * Tx queue group list was configured and the context bits were
+	 * programmed using ice_vsi_cfg_txqs
+	 */
+	err = ice_vsi_start_all_rx_rings(vsi);
+	if (err)
+		return err;
+
+	clear_bit(__ICE_DOWN, vsi->state);
+	ice_napi_enable_all(vsi);
+	ice_vsi_ena_irq(vsi);
+
+	if (vsi->port_info &&
+	    (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
+	    vsi->netdev && vsi->type == ICE_VSI_PF) {
+		ice_print_link_msg(vsi, true);
+		netif_tx_start_all_queues(vsi->netdev);
+		netif_carrier_on(vsi->netdev);
+	}
+
+	/* Perform an initial read of the statistics registers now to
+	 * set the baseline so counters are ready when interface is up
+	 */
+	ice_update_eth_stats(vsi);
+
+	if (vsi->type == ICE_VSI_PF)
+		ice_service_task_schedule(pf);
+
+	return 0;
+}
+
+/**
+ * ice_up - Bring the connection back up after being down
+ * @vsi: VSI being configured
+ */
+int ice_up(struct ice_vsi *vsi)
+{
+	int err;
+
+	err = ice_vsi_cfg(vsi);
+	if (!err)
+		err = ice_up_complete(vsi);
+
+	return err;
+}
+
+/**
+ * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
+ * @ring: Tx or Rx ring to read stats from
+ * @pkts: packets stats counter
+ * @bytes: bytes stats counter
+ *
+ * This function fetches stats from the ring considering the atomic operations
+ * that needs to be performed to read u64 values in 32 bit machine.
+ */
+static void
+ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes)
+{
+	unsigned int start;
+	*pkts = 0;
+	*bytes = 0;
+
+	if (!ring)
+		return;
+	do {
+		start = u64_stats_fetch_begin_irq(&ring->syncp);
+		*pkts = ring->stats.pkts;
+		*bytes = ring->stats.bytes;
+	} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+}
+
+/**
+ * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
+ * @vsi: the VSI to be updated
+ * @rings: rings to work on
+ * @count: number of rings
+ */
+static void
+ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_ring **rings,
+			     u16 count)
+{
+	struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats;
+	u16 i;
+
+	for (i = 0; i < count; i++) {
+		struct ice_ring *ring;
+		u64 pkts, bytes;
+
+		ring = READ_ONCE(rings[i]);
+		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
+		vsi_stats->tx_packets += pkts;
+		vsi_stats->tx_bytes += bytes;
+		vsi->tx_restart += ring->tx_stats.restart_q;
+		vsi->tx_busy += ring->tx_stats.tx_busy;
+		vsi->tx_linearize += ring->tx_stats.tx_linearize;
+	}
+}
+
+/**
+ * ice_update_vsi_ring_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
+{
+	struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats;
+	struct ice_ring *ring;
+	u64 pkts, bytes;
+	int i;
+
+	/* reset netdev stats */
+	vsi_stats->tx_packets = 0;
+	vsi_stats->tx_bytes = 0;
+	vsi_stats->rx_packets = 0;
+	vsi_stats->rx_bytes = 0;
+
+	/* reset non-netdev (extended) stats */
+	vsi->tx_restart = 0;
+	vsi->tx_busy = 0;
+	vsi->tx_linearize = 0;
+	vsi->rx_buf_failed = 0;
+	vsi->rx_page_failed = 0;
+	vsi->rx_gro_dropped = 0;
+
+	rcu_read_lock();
+
+	/* update Tx rings counters */
+	ice_update_vsi_tx_ring_stats(vsi, vsi->tx_rings, vsi->num_txq);
+
+	/* update Rx rings counters */
+	ice_for_each_rxq(vsi, i) {
+		ring = READ_ONCE(vsi->rx_rings[i]);
+		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
+		vsi_stats->rx_packets += pkts;
+		vsi_stats->rx_bytes += bytes;
+		vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
+		vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
+		vsi->rx_gro_dropped += ring->rx_stats.gro_dropped;
+	}
+
+	/* update XDP Tx rings counters */
+	if (ice_is_xdp_ena_vsi(vsi))
+		ice_update_vsi_tx_ring_stats(vsi, vsi->xdp_rings,
+					     vsi->num_xdp_txq);
+
+	rcu_read_unlock();
+}
+
+/**
+ * ice_update_vsi_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+void ice_update_vsi_stats(struct ice_vsi *vsi)
+{
+	struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
+	struct ice_eth_stats *cur_es = &vsi->eth_stats;
+	struct ice_pf *pf = vsi->back;
+
+	if (test_bit(__ICE_DOWN, vsi->state) ||
+	    test_bit(__ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* get stats as recorded by Tx/Rx rings */
+	ice_update_vsi_ring_stats(vsi);
+
+	/* get VSI stats as recorded by the hardware */
+	ice_update_eth_stats(vsi);
+
+	cur_ns->tx_errors = cur_es->tx_errors;
+	cur_ns->rx_dropped = cur_es->rx_discards + vsi->rx_gro_dropped;
+	cur_ns->tx_dropped = cur_es->tx_discards;
+	cur_ns->multicast = cur_es->rx_multicast;
+
+	/* update some more netdev stats if this is main VSI */
+	if (vsi->type == ICE_VSI_PF) {
+		cur_ns->rx_crc_errors = pf->stats.crc_errors;
+		cur_ns->rx_errors = pf->stats.crc_errors +
+				    pf->stats.illegal_bytes +
+				    pf->stats.rx_len_errors +
+				    pf->stats.rx_undersize +
+				    pf->hw_csum_rx_error +
+				    pf->stats.rx_jabber +
+				    pf->stats.rx_fragments +
+				    pf->stats.rx_oversize;
+		cur_ns->rx_length_errors = pf->stats.rx_len_errors;
+		/* record drops from the port level */
+		cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
+	}
+}
+
+/**
+ * ice_update_pf_stats - Update PF port stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+void ice_update_pf_stats(struct ice_pf *pf)
+{
+	struct ice_hw_port_stats *prev_ps, *cur_ps;
+	struct ice_hw *hw = &pf->hw;
+	u16 fd_ctr_base;
+	u8 port;
+
+	port = hw->port_info->lport;
+	prev_ps = &pf->stats_prev;
+	cur_ps = &pf->stats;
+
+	ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_bytes,
+			  &cur_ps->eth.rx_bytes);
+
+	ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_unicast,
+			  &cur_ps->eth.rx_unicast);
+
+	ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_multicast,
+			  &cur_ps->eth.rx_multicast);
+
+	ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_broadcast,
+			  &cur_ps->eth.rx_broadcast);
+
+	ice_stat_update32(hw, PRTRPB_RDPC, pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_discards,
+			  &cur_ps->eth.rx_discards);
+
+	ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_bytes,
+			  &cur_ps->eth.tx_bytes);
+
+	ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_unicast,
+			  &cur_ps->eth.tx_unicast);
+
+	ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_multicast,
+			  &cur_ps->eth.tx_multicast);
+
+	ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_broadcast,
+			  &cur_ps->eth.tx_broadcast);
+
+	ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_dropped_link_down,
+			  &cur_ps->tx_dropped_link_down);
+
+	ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_64, &cur_ps->rx_size_64);
+
+	ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_127, &cur_ps->rx_size_127);
+
+	ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_255, &cur_ps->rx_size_255);
+
+	ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_511, &cur_ps->rx_size_511);
+
+	ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
+
+	ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
+
+	ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_big, &cur_ps->rx_size_big);
+
+	ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_64, &cur_ps->tx_size_64);
+
+	ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_127, &cur_ps->tx_size_127);
+
+	ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_255, &cur_ps->tx_size_255);
+
+	ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_511, &cur_ps->tx_size_511);
+
+	ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
+
+	ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
+
+	ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_big, &cur_ps->tx_size_big);
+
+	fd_ctr_base = hw->fd_ctr_base;
+
+	ice_stat_update40(hw,
+			  GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)),
+			  pf->stat_prev_loaded, &prev_ps->fd_sb_match,
+			  &cur_ps->fd_sb_match);
+	ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded,
+			  &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
+
+	ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded,
+			  &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
+
+	ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded,
+			  &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
+
+	ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded,
+			  &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
+
+	ice_update_dcb_stats(pf);
+
+	ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded,
+			  &prev_ps->crc_errors, &cur_ps->crc_errors);
+
+	ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded,
+			  &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
+
+	ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded,
+			  &prev_ps->mac_local_faults,
+			  &cur_ps->mac_local_faults);
+
+	ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded,
+			  &prev_ps->mac_remote_faults,
+			  &cur_ps->mac_remote_faults);
+
+	ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
+
+	ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_undersize, &cur_ps->rx_undersize);
+
+	ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_fragments, &cur_ps->rx_fragments);
+
+	ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_oversize, &cur_ps->rx_oversize);
+
+	ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_jabber, &cur_ps->rx_jabber);
+
+	cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
+
+	pf->stat_prev_loaded = true;
+}
+
+/**
+ * ice_get_stats64 - get statistics for network device structure
+ * @netdev: network interface device structure
+ * @stats: main device statistics structure
+ */
+static
+void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct rtnl_link_stats64 *vsi_stats;
+	struct ice_vsi *vsi = np->vsi;
+
+	vsi_stats = &vsi->net_stats;
+
+	if (!vsi->num_txq || !vsi->num_rxq)
+		return;
+
+	/* netdev packet/byte stats come from ring counter. These are obtained
+	 * by summing up ring counters (done by ice_update_vsi_ring_stats).
+	 * But, only call the update routine and read the registers if VSI is
+	 * not down.
+	 */
+	if (!test_bit(__ICE_DOWN, vsi->state))
+		ice_update_vsi_ring_stats(vsi);
+	stats->tx_packets = vsi_stats->tx_packets;
+	stats->tx_bytes = vsi_stats->tx_bytes;
+	stats->rx_packets = vsi_stats->rx_packets;
+	stats->rx_bytes = vsi_stats->rx_bytes;
+
+	/* The rest of the stats can be read from the hardware but instead we
+	 * just return values that the watchdog task has already obtained from
+	 * the hardware.
+	 */
+	stats->multicast = vsi_stats->multicast;
+	stats->tx_errors = vsi_stats->tx_errors;
+	stats->tx_dropped = vsi_stats->tx_dropped;
+	stats->rx_errors = vsi_stats->rx_errors;
+	stats->rx_dropped = vsi_stats->rx_dropped;
+	stats->rx_crc_errors = vsi_stats->rx_crc_errors;
+	stats->rx_length_errors = vsi_stats->rx_length_errors;
+}
+
+/**
+ * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
+ * @vsi: VSI having NAPI disabled
+ */
+static void ice_napi_disable_all(struct ice_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_disable(&q_vector->napi);
+	}
+}
+
+/**
+ * ice_down - Shutdown the connection
+ * @vsi: The VSI being stopped
+ */
+int ice_down(struct ice_vsi *vsi)
+{
+	int i, tx_err, rx_err, link_err = 0;
+
+	/* Caller of this function is expected to set the
+	 * vsi->state __ICE_DOWN bit
+	 */
+	if (vsi->netdev) {
+		netif_carrier_off(vsi->netdev);
+		netif_tx_disable(vsi->netdev);
+	}
+
+	ice_vsi_dis_irq(vsi);
+
+	tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+	if (tx_err)
+		netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n",
+			   vsi->vsi_num, tx_err);
+	if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+		tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+		if (tx_err)
+			netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n",
+				   vsi->vsi_num, tx_err);
+	}
+
+	rx_err = ice_vsi_stop_all_rx_rings(vsi);
+	if (rx_err)
+		netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n",
+			   vsi->vsi_num, rx_err);
+
+	ice_napi_disable_all(vsi);
+
+	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
+		link_err = ice_force_phys_link_state(vsi, false);
+		if (link_err)
+			netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
+				   vsi->vsi_num, link_err);
+	}
+
+	ice_for_each_txq(vsi, i)
+		ice_clean_tx_ring(vsi->tx_rings[i]);
+
+	ice_for_each_rxq(vsi, i)
+		ice_clean_rx_ring(vsi->rx_rings[i]);
+
+	if (tx_err || rx_err || link_err) {
+		netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
+			   vsi->vsi_num, vsi->vsw->sw_id);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+{
+	int i, err = 0;
+
+	if (!vsi->num_txq) {
+		dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	ice_for_each_txq(vsi, i) {
+		struct ice_ring *ring = vsi->tx_rings[i];
+
+		if (!ring)
+			return -EINVAL;
+
+		ring->netdev = vsi->netdev;
+		err = ice_setup_tx_ring(ring);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+{
+	int i, err = 0;
+
+	if (!vsi->num_rxq) {
+		dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	ice_for_each_rxq(vsi, i) {
+		struct ice_ring *ring = vsi->rx_rings[i];
+
+		if (!ring)
+			return -EINVAL;
+
+		ring->netdev = vsi->netdev;
+		err = ice_setup_rx_ring(ring);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_open_ctrl - open control VSI for use
+ * @vsi: the VSI to open
+ *
+ * Initialization of the Control VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+int ice_vsi_open_ctrl(struct ice_vsi *vsi)
+{
+	char int_name[ICE_INT_NAME_STR_LEN];
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	/* allocate descriptors */
+	err = ice_vsi_setup_tx_rings(vsi);
+	if (err)
+		goto err_setup_tx;
+
+	err = ice_vsi_setup_rx_rings(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	err = ice_vsi_cfg(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	snprintf(int_name, sizeof(int_name) - 1, "%s-%s:ctrl",
+		 dev_driver_string(dev), dev_name(dev));
+	err = ice_vsi_req_irq_msix(vsi, int_name);
+	if (err)
+		goto err_setup_rx;
+
+	ice_vsi_cfg_msix(vsi);
+
+	err = ice_vsi_start_all_rx_rings(vsi);
+	if (err)
+		goto err_up_complete;
+
+	clear_bit(__ICE_DOWN, vsi->state);
+	ice_vsi_ena_irq(vsi);
+
+	return 0;
+
+err_up_complete:
+	ice_down(vsi);
+err_setup_rx:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+	ice_vsi_free_tx_rings(vsi);
+
+	return err;
+}
+
+/**
+ * ice_vsi_open - Called when a network interface is made active
+ * @vsi: the VSI to open
+ *
+ * Initialization of the VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+static int ice_vsi_open(struct ice_vsi *vsi)
+{
+	char int_name[ICE_INT_NAME_STR_LEN];
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	/* allocate descriptors */
+	err = ice_vsi_setup_tx_rings(vsi);
+	if (err)
+		goto err_setup_tx;
+
+	err = ice_vsi_setup_rx_rings(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	err = ice_vsi_cfg(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
+		 dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
+	err = ice_vsi_req_irq_msix(vsi, int_name);
+	if (err)
+		goto err_setup_rx;
+
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
+	if (err)
+		goto err_set_qs;
+
+	err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
+	if (err)
+		goto err_set_qs;
+
+	err = ice_up_complete(vsi);
+	if (err)
+		goto err_up_complete;
+
+	return 0;
+
+err_up_complete:
+	ice_down(vsi);
+err_set_qs:
+	ice_vsi_free_irq(vsi);
+err_setup_rx:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+	ice_vsi_free_tx_rings(vsi);
+
+	return err;
+}
+
+/**
+ * ice_vsi_release_all - Delete all VSIs
+ * @pf: PF from which all VSIs are being removed
+ */
+static void ice_vsi_release_all(struct ice_pf *pf)
+{
+	int err, i;
+
+	if (!pf->vsi)
+		return;
+
+	ice_for_each_vsi(pf, i) {
+		if (!pf->vsi[i])
+			continue;
+
+		err = ice_vsi_release(pf->vsi[i]);
+		if (err)
+			dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
+				i, err, pf->vsi[i]->vsi_num);
+	}
+}
+
+/**
+ * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
+ * @pf: pointer to the PF instance
+ * @type: VSI type to rebuild
+ *
+ * Iterates through the pf->vsi array and rebuilds VSIs of the requested type
+ */
+static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_status status;
+	int i, err;
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+
+		if (!vsi || vsi->type != type)
+			continue;
+
+		/* rebuild the VSI */
+		err = ice_vsi_rebuild(vsi, true);
+		if (err) {
+			dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
+			return err;
+		}
+
+		/* replay filters for the VSI */
+		status = ice_replay_vsi(&pf->hw, vsi->idx);
+		if (status) {
+			dev_err(dev, "replay VSI failed, status %s, VSI index %d, type %s\n",
+				ice_stat_str(status), vsi->idx,
+				ice_vsi_type_str(type));
+			return -EIO;
+		}
+
+		/* Re-map HW VSI number, using VSI handle that has been
+		 * previously validated in ice_replay_vsi() call above
+		 */
+		vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+
+		/* enable the VSI */
+		err = ice_ena_vsi(vsi, false);
+		if (err) {
+			dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
+			return err;
+		}
+
+		dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
+			 ice_vsi_type_str(type));
+	}
+
+	return 0;
+}
+
+/**
+ * ice_update_pf_netdev_link - Update PF netdev link status
+ * @pf: pointer to the PF instance
+ */
+static void ice_update_pf_netdev_link(struct ice_pf *pf)
+{
+	bool link_up;
+	int i;
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+
+		if (!vsi || vsi->type != ICE_VSI_PF)
+			return;
+
+		ice_get_link_status(pf->vsi[i]->port_info, &link_up);
+		if (link_up) {
+			netif_carrier_on(pf->vsi[i]->netdev);
+			netif_tx_wake_all_queues(pf->vsi[i]->netdev);
+		} else {
+			netif_carrier_off(pf->vsi[i]->netdev);
+			netif_tx_stop_all_queues(pf->vsi[i]->netdev);
+		}
+	}
+}
+
+/**
+ * ice_rebuild - rebuild after reset
+ * @pf: PF to rebuild
+ * @reset_type: type of reset
+ *
+ * Do not rebuild VF VSI in this flow because that is already handled via
+ * ice_reset_all_vfs(). This is because requirements for resetting a VF after a
+ * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
+ * to reset/rebuild all the VF VSI twice.
+ */
+static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status ret;
+	int err;
+
+	if (test_bit(__ICE_DOWN, pf->state))
+		goto clear_recovery;
+
+	dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
+
+	ret = ice_init_all_ctrlq(hw);
+	if (ret) {
+		dev_err(dev, "control queues init failed %s\n",
+			ice_stat_str(ret));
+		goto err_init_ctrlq;
+	}
+
+	/* if DDP was previously loaded successfully */
+	if (!ice_is_safe_mode(pf)) {
+		/* reload the SW DB of filter tables */
+		if (reset_type == ICE_RESET_PFR)
+			ice_fill_blk_tbls(hw);
+		else
+			/* Reload DDP Package after CORER/GLOBR reset */
+			ice_load_pkg(NULL, pf);
+	}
+
+	ret = ice_clear_pf_cfg(hw);
+	if (ret) {
+		dev_err(dev, "clear PF configuration failed %s\n",
+			ice_stat_str(ret));
+		goto err_init_ctrlq;
+	}
+
+	if (pf->first_sw->dflt_vsi_ena)
+		dev_info(dev, "Clearing default VSI, re-enable after reset completes\n");
+	/* clear the default VSI configuration if it exists */
+	pf->first_sw->dflt_vsi = NULL;
+	pf->first_sw->dflt_vsi_ena = false;
+
+	ice_clear_pxe_mode(hw);
+
+	ret = ice_get_caps(hw);
+	if (ret) {
+		dev_err(dev, "ice_get_caps failed %s\n", ice_stat_str(ret));
+		goto err_init_ctrlq;
+	}
+
+	ret = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
+	if (ret) {
+		dev_err(dev, "set_mac_cfg failed %s\n", ice_stat_str(ret));
+		goto err_init_ctrlq;
+	}
+
+	err = ice_sched_init_port(hw->port_info);
+	if (err)
+		goto err_sched_init_port;
+
+	/* start misc vector */
+	err = ice_req_irq_msix_misc(pf);
+	if (err) {
+		dev_err(dev, "misc vector setup failed: %d\n", err);
+		goto err_sched_init_port;
+	}
+
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+		wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
+		if (!rd32(hw, PFQF_FD_SIZE)) {
+			u16 unused, guar, b_effort;
+
+			guar = hw->func_caps.fd_fltr_guar;
+			b_effort = hw->func_caps.fd_fltr_best_effort;
+
+			/* force guaranteed filter pool for PF */
+			ice_alloc_fd_guar_item(hw, &unused, guar);
+			/* force shared filter pool for PF */
+			ice_alloc_fd_shrd_item(hw, &unused, b_effort);
+		}
+	}
+
+	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		ice_dcb_rebuild(pf);
+
+	/* rebuild PF VSI */
+	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
+	if (err) {
+		dev_err(dev, "PF VSI rebuild failed: %d\n", err);
+		goto err_vsi_rebuild;
+	}
+
+	/* If Flow Director is active */
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+		err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
+		if (err) {
+			dev_err(dev, "control VSI rebuild failed: %d\n", err);
+			goto err_vsi_rebuild;
+		}
+
+		/* replay HW Flow Director recipes */
+		if (hw->fdir_prof)
+			ice_fdir_replay_flows(hw);
+
+		/* replay Flow Director filters */
+		ice_fdir_replay_fltrs(pf);
+
+		ice_rebuild_arfs(pf);
+	}
+
+	ice_update_pf_netdev_link(pf);
+
+	/* tell the firmware we are up */
+	ret = ice_send_version(pf);
+	if (ret) {
+		dev_err(dev, "Rebuild failed due to error sending driver version: %s\n",
+			ice_stat_str(ret));
+		goto err_vsi_rebuild;
+	}
+
+	ice_replay_post(hw);
+
+	/* if we get here, reset flow is successful */
+	clear_bit(__ICE_RESET_FAILED, pf->state);
+	return;
+
+err_vsi_rebuild:
+err_sched_init_port:
+	ice_sched_cleanup_all(hw);
+err_init_ctrlq:
+	ice_shutdown_all_ctrlq(hw);
+	set_bit(__ICE_RESET_FAILED, pf->state);
+clear_recovery:
+	/* set this bit in PF state to control service task scheduling */
+	set_bit(__ICE_NEEDS_RESTART, pf->state);
+	dev_err(dev, "Rebuild failed, unload and reload driver\n");
+}
+
+/**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+	if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+		return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
+	else
+		return ICE_RXBUF_3072;
+}
+
+/**
+ * ice_change_mtu - NDO callback to change the MTU
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u8 count = 0;
+
+	if (new_mtu == (int)netdev->mtu) {
+		netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
+		return 0;
+	}
+
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		int frame_size = ice_max_xdp_frame_size(vsi);
+
+		if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+			netdev_err(netdev, "max MTU for XDP usage is %d\n",
+				   frame_size - ICE_ETH_PKT_HDR_PAD);
+			return -EINVAL;
+		}
+	}
+
+	if (new_mtu < (int)netdev->min_mtu) {
+		netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
+			   netdev->min_mtu);
+		return -EINVAL;
+	} else if (new_mtu > (int)netdev->max_mtu) {
+		netdev_err(netdev, "new MTU invalid. max_mtu is %d\n",
+			   netdev->min_mtu);
+		return -EINVAL;
+	}
+	/* if a reset is in progress, wait for some time for it to complete */
+	do {
+		if (ice_is_reset_in_progress(pf->state)) {
+			count++;
+			usleep_range(1000, 2000);
+		} else {
+			break;
+		}
+
+	} while (count < 100);
+
+	if (count == 100) {
+		netdev_err(netdev, "can't change MTU. Device is busy\n");
+		return -EBUSY;
+	}
+
+	netdev->mtu = (unsigned int)new_mtu;
+
+	/* if VSI is up, bring it down and then back up */
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		int err;
+
+		err = ice_down(vsi);
+		if (err) {
+			netdev_err(netdev, "change MTU if_up err %d\n", err);
+			return err;
+		}
+
+		err = ice_up(vsi);
+		if (err) {
+			netdev_err(netdev, "change MTU if_up err %d\n", err);
+			return err;
+		}
+	}
+
+	netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
+	return 0;
+}
+
+/**
+ * ice_aq_str - convert AQ err code to a string
+ * @aq_err: the AQ error code to convert
+ */
+const char *ice_aq_str(enum ice_aq_err aq_err)
+{
+	switch (aq_err) {
+	case ICE_AQ_RC_OK:
+		return "OK";
+	case ICE_AQ_RC_EPERM:
+		return "ICE_AQ_RC_EPERM";
+	case ICE_AQ_RC_ENOENT:
+		return "ICE_AQ_RC_ENOENT";
+	case ICE_AQ_RC_ENOMEM:
+		return "ICE_AQ_RC_ENOMEM";
+	case ICE_AQ_RC_EBUSY:
+		return "ICE_AQ_RC_EBUSY";
+	case ICE_AQ_RC_EEXIST:
+		return "ICE_AQ_RC_EEXIST";
+	case ICE_AQ_RC_EINVAL:
+		return "ICE_AQ_RC_EINVAL";
+	case ICE_AQ_RC_ENOSPC:
+		return "ICE_AQ_RC_ENOSPC";
+	case ICE_AQ_RC_ENOSYS:
+		return "ICE_AQ_RC_ENOSYS";
+	case ICE_AQ_RC_EMODE:
+		return "ICE_AQ_RC_EMODE";
+	case ICE_AQ_RC_ENOSEC:
+		return "ICE_AQ_RC_ENOSEC";
+	case ICE_AQ_RC_EBADSIG:
+		return "ICE_AQ_RC_EBADSIG";
+	case ICE_AQ_RC_ESVN:
+		return "ICE_AQ_RC_ESVN";
+	case ICE_AQ_RC_EBADMAN:
+		return "ICE_AQ_RC_EBADMAN";
+	case ICE_AQ_RC_EBADBUF:
+		return "ICE_AQ_RC_EBADBUF";
+	}
+
+	return "ICE_AQ_RC_UNKNOWN";
+}
+
+/**
+ * ice_stat_str - convert status err code to a string
+ * @stat_err: the status error code to convert
+ */
+const char *ice_stat_str(enum ice_status stat_err)
+{
+	switch (stat_err) {
+	case ICE_SUCCESS:
+		return "OK";
+	case ICE_ERR_PARAM:
+		return "ICE_ERR_PARAM";
+	case ICE_ERR_NOT_IMPL:
+		return "ICE_ERR_NOT_IMPL";
+	case ICE_ERR_NOT_READY:
+		return "ICE_ERR_NOT_READY";
+	case ICE_ERR_NOT_SUPPORTED:
+		return "ICE_ERR_NOT_SUPPORTED";
+	case ICE_ERR_BAD_PTR:
+		return "ICE_ERR_BAD_PTR";
+	case ICE_ERR_INVAL_SIZE:
+		return "ICE_ERR_INVAL_SIZE";
+	case ICE_ERR_DEVICE_NOT_SUPPORTED:
+		return "ICE_ERR_DEVICE_NOT_SUPPORTED";
+	case ICE_ERR_RESET_FAILED:
+		return "ICE_ERR_RESET_FAILED";
+	case ICE_ERR_FW_API_VER:
+		return "ICE_ERR_FW_API_VER";
+	case ICE_ERR_NO_MEMORY:
+		return "ICE_ERR_NO_MEMORY";
+	case ICE_ERR_CFG:
+		return "ICE_ERR_CFG";
+	case ICE_ERR_OUT_OF_RANGE:
+		return "ICE_ERR_OUT_OF_RANGE";
+	case ICE_ERR_ALREADY_EXISTS:
+		return "ICE_ERR_ALREADY_EXISTS";
+	case ICE_ERR_NVM_CHECKSUM:
+		return "ICE_ERR_NVM_CHECKSUM";
+	case ICE_ERR_BUF_TOO_SHORT:
+		return "ICE_ERR_BUF_TOO_SHORT";
+	case ICE_ERR_NVM_BLANK_MODE:
+		return "ICE_ERR_NVM_BLANK_MODE";
+	case ICE_ERR_IN_USE:
+		return "ICE_ERR_IN_USE";
+	case ICE_ERR_MAX_LIMIT:
+		return "ICE_ERR_MAX_LIMIT";
+	case ICE_ERR_RESET_ONGOING:
+		return "ICE_ERR_RESET_ONGOING";
+	case ICE_ERR_HW_TABLE:
+		return "ICE_ERR_HW_TABLE";
+	case ICE_ERR_DOES_NOT_EXIST:
+		return "ICE_ERR_DOES_NOT_EXIST";
+	case ICE_ERR_FW_DDP_MISMATCH:
+		return "ICE_ERR_FW_DDP_MISMATCH";
+	case ICE_ERR_AQ_ERROR:
+		return "ICE_ERR_AQ_ERROR";
+	case ICE_ERR_AQ_TIMEOUT:
+		return "ICE_ERR_AQ_TIMEOUT";
+	case ICE_ERR_AQ_FULL:
+		return "ICE_ERR_AQ_FULL";
+	case ICE_ERR_AQ_NO_WORK:
+		return "ICE_ERR_AQ_NO_WORK";
+	case ICE_ERR_AQ_EMPTY:
+		return "ICE_ERR_AQ_EMPTY";
+	case ICE_ERR_AQ_FW_CRITICAL:
+		return "ICE_ERR_AQ_FW_CRITICAL";
+	}
+
+	return "ICE_ERR_UNKNOWN";
+}
+
+/**
+ * ice_set_rss - Set RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (seed) {
+		struct ice_aqc_get_set_rss_keys *buf =
+				  (struct ice_aqc_get_set_rss_keys *)seed;
+
+		status = ice_aq_set_rss_key(hw, vsi->idx, buf);
+
+		if (status) {
+			dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.sq_last_status));
+			return -EIO;
+		}
+	}
+
+	if (lut) {
+		status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
+					    lut, lut_size);
+		if (status) {
+			dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.sq_last_status));
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rss - Get RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the keys
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (seed) {
+		struct ice_aqc_get_set_rss_keys *buf =
+				  (struct ice_aqc_get_set_rss_keys *)seed;
+
+		status = ice_aq_get_rss_key(hw, vsi->idx, buf);
+		if (status) {
+			dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.sq_last_status));
+			return -EIO;
+		}
+	}
+
+	if (lut) {
+		status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
+					    lut, lut_size);
+		if (status) {
+			dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.sq_last_status));
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_bridge_getlink - Get the hardware bridge mode
+ * @skb: skb buff
+ * @pid: process ID
+ * @seq: RTNL message seq
+ * @dev: the netdev being configured
+ * @filter_mask: filter mask passed in
+ * @nlflags: netlink flags passed in
+ *
+ * Return the bridge mode (VEB/VEPA)
+ */
+static int
+ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+		   struct net_device *dev, u32 filter_mask, int nlflags)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u16 bmode;
+
+	bmode = pf->first_sw->bridge_mode;
+
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags,
+				       filter_mask, NULL);
+}
+
+/**
+ * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
+ * @vsi: Pointer to VSI structure
+ * @bmode: Hardware bridge mode (VEB/VEPA)
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
+{
+	struct ice_aqc_vsi_props *vsi_props;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	int ret = 0;
+
+	vsi_props = &vsi->info;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+
+	if (bmode == BRIDGE_MODE_VEB)
+		/* change from VEPA to VEB mode */
+		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+	else
+		/* change from VEB to VEPA mode */
+		ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %s aq_err %s\n",
+			bmode, ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		ret = -EIO;
+		goto out;
+	}
+	/* Update sw flags for book keeping */
+	vsi_props->sw_flags = ctxt->info.sw_flags;
+
+out:
+	kfree(ctxt);
+	return ret;
+}
+
+/**
+ * ice_bridge_setlink - Set the hardware bridge mode
+ * @dev: the netdev being configured
+ * @nlh: RTNL message
+ * @flags: bridge setlink flags
+ * @extack: netlink extended ack
+ *
+ * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
+ * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
+ * not already set for all VSIs connected to this switch. And also update the
+ * unicast switch filter rules for the corresponding switch of the netdev.
+ */
+static int
+ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+		   u16 __always_unused flags,
+		   struct netlink_ext_ack __always_unused *extack)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_pf *pf = np->vsi->back;
+	struct nlattr *attr, *br_spec;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	struct ice_sw *pf_sw;
+	int rem, v, err = 0;
+
+	pf_sw = pf->first_sw;
+	/* find the attribute in the netlink message */
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		__u16 mode;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+		mode = nla_get_u16(attr);
+		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
+			return -EINVAL;
+		/* Continue  if bridge mode is not being flipped */
+		if (mode == pf_sw->bridge_mode)
+			continue;
+		/* Iterates through the PF VSI list and update the loopback
+		 * mode of the VSI
+		 */
+		ice_for_each_vsi(pf, v) {
+			if (!pf->vsi[v])
+				continue;
+			err = ice_vsi_update_bridge_mode(pf->vsi[v], mode);
+			if (err)
+				return err;
+		}
+
+		hw->evb_veb = (mode == BRIDGE_MODE_VEB);
+		/* Update the unicast switch filter rules for the corresponding
+		 * switch of the netdev
+		 */
+		status = ice_update_sw_rule_bridge_mode(hw);
+		if (status) {
+			netdev_err(dev, "switch rule update failed, mode = %d err %s aq_err %s\n",
+				   mode, ice_stat_str(status),
+				   ice_aq_str(hw->adminq.sq_last_status));
+			/* revert hw->evb_veb */
+			hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
+			return -EIO;
+		}
+
+		pf_sw->bridge_mode = mode;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: Tx queue
+ */
+static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_ring *tx_ring = NULL;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u32 i;
+
+	pf->tx_timeout_count++;
+
+	/* Check if PFC is enabled for the TC to which the queue belongs
+	 * to. If yes then Tx timeout is not caused by a hung queue, no
+	 * need to reset and rebuild
+	 */
+	if (ice_is_pfc_causing_hung_q(pf, txqueue)) {
+		dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
+			 txqueue);
+		return;
+	}
+
+	/* now that we have an index, find the tx_ring struct */
+	for (i = 0; i < vsi->num_txq; i++)
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+			if (txqueue == vsi->tx_rings[i]->q_index) {
+				tx_ring = vsi->tx_rings[i];
+				break;
+			}
+
+	/* Reset recovery level if enough time has elapsed after last timeout.
+	 * Also ensure no new reset action happens before next timeout period.
+	 */
+	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20)))
+		pf->tx_timeout_recovery_level = 1;
+	else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
+				       netdev->watchdog_timeo)))
+		return;
+
+	if (tx_ring) {
+		struct ice_hw *hw = &pf->hw;
+		u32 head, val = 0;
+
+		head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) &
+			QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S;
+		/* Read interrupt register */
+		val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
+
+		netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
+			    vsi->vsi_num, txqueue, tx_ring->next_to_clean,
+			    head, tx_ring->next_to_use, val);
+	}
+
+	pf->tx_timeout_last_recovery = jiffies;
+	netdev_info(netdev, "tx_timeout recovery level %d, txqueue %u\n",
+		    pf->tx_timeout_recovery_level, txqueue);
+
+	switch (pf->tx_timeout_recovery_level) {
+	case 1:
+		set_bit(__ICE_PFR_REQ, pf->state);
+		break;
+	case 2:
+		set_bit(__ICE_CORER_REQ, pf->state);
+		break;
+	case 3:
+		set_bit(__ICE_GLOBR_REQ, pf->state);
+		break;
+	default:
+		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
+		set_bit(__ICE_DOWN, pf->state);
+		set_bit(__ICE_NEEDS_RESTART, vsi->state);
+		set_bit(__ICE_SERVICE_DIS, pf->state);
+		break;
+	}
+
+	ice_service_task_schedule(pf);
+	pf->tx_timeout_recovery_level++;
+}
+
+/**
+ * ice_open - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog is enabled,
+ * and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ */
+int ice_open(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	if (ice_is_reset_in_progress(pf->state)) {
+		netdev_err(netdev, "can't open net device while reset is in progress");
+		return -EBUSY;
+	}
+
+	return ice_open_internal(netdev);
+}
+
+/**
+ * ice_open_internal - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * Internal ice_open implementation. Should not be used directly except for ice_open and reset
+ * handling routine
+ *
+ * Returns 0 on success, negative value on failure
+ */
+int ice_open_internal(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_port_info *pi;
+	int err;
+
+	if (test_bit(__ICE_NEEDS_RESTART, pf->state)) {
+		netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
+		return -EIO;
+	}
+
+	if (test_bit(__ICE_DOWN, pf->state)) {
+		netdev_err(netdev, "device is not ready yet\n");
+		return -EBUSY;
+	}
+
+	netif_carrier_off(netdev);
+
+	pi = vsi->port_info;
+	err = ice_update_link_info(pi);
+	if (err) {
+		netdev_err(netdev, "Failed to get link info, error %d\n",
+			   err);
+		return err;
+	}
+
+	/* Set PHY if there is media, otherwise, turn off PHY */
+	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+		if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) {
+			err = ice_init_phy_user_cfg(pi);
+			if (err) {
+				netdev_err(netdev, "Failed to initialize PHY settings, error %d\n",
+					   err);
+				return err;
+			}
+		}
+
+		err = ice_configure_phy(vsi);
+		if (err) {
+			netdev_err(netdev, "Failed to set physical link up, error %d\n",
+				   err);
+			return err;
+		}
+	} else {
+		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+		err = ice_aq_set_link_restart_an(pi, false, NULL);
+		if (err) {
+			netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n",
+				   vsi->vsi_num, err);
+			return err;
+		}
+	}
+
+	err = ice_vsi_open(vsi);
+	if (err)
+		netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
+			   vsi->vsi_num, vsi->vsw->sw_id);
+
+	/* Update existing tunnels information */
+	udp_tunnel_get_rx_info(netdev);
+
+	return err;
+}
+
+/**
+ * ice_stop - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when an interface is de-activated by the OS,
+ * and the netdevice enters the DOWN state. The hardware is still under the
+ * driver's control, but the netdev interface is disabled.
+ *
+ * Returns success only - not allowed to fail
+ */
+int ice_stop(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	if (ice_is_reset_in_progress(pf->state)) {
+		netdev_err(netdev, "can't stop net device while reset is in progress");
+		return -EBUSY;
+	}
+
+	ice_vsi_close(vsi);
+
+	return 0;
+}
+
+/**
+ * ice_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buffer
+ * @netdev: This port's netdev
+ * @features: Offload features that the stack believes apply
+ */
+static netdev_features_t
+ice_features_check(struct sk_buff *skb,
+		   struct net_device __always_unused *netdev,
+		   netdev_features_t features)
+{
+	bool gso = skb_is_gso(skb);
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame. We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes. If it is then we need to drop support for GSO.
+	 */
+	if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
+		features &= ~NETIF_F_GSO_MASK;
+
+	len = skb_network_offset(skb);
+	if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
+		goto out_rm_features;
+
+	len = skb_network_header_len(skb);
+	if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
+		goto out_rm_features;
+
+	if (skb->encapsulation) {
+		/* this must work for VXLAN frames AND IPIP/SIT frames, and in
+		 * the case of IPIP frames, the transport header pointer is
+		 * after the inner header! So check to make sure that this
+		 * is a GRE or UDP_TUNNEL frame before doing that math.
+		 */
+		if (gso && (skb_shinfo(skb)->gso_type &
+			    (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) {
+			len = skb_inner_network_header(skb) -
+			      skb_transport_header(skb);
+			if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
+				goto out_rm_features;
+		}
+
+		len = skb_inner_network_header_len(skb);
+		if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
+			goto out_rm_features;
+	}
+
+	return features;
+out_rm_features:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static const struct net_device_ops ice_netdev_safe_mode_ops = {
+	.ndo_open = ice_open,
+	.ndo_stop = ice_stop,
+	.ndo_start_xmit = ice_start_xmit,
+	.ndo_set_mac_address = ice_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ice_change_mtu,
+	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_tx_timeout = ice_tx_timeout,
+	.ndo_bpf = ice_xdp_safe_mode,
+};
+
+static const struct net_device_ops ice_netdev_ops = {
+	.ndo_open = ice_open,
+	.ndo_stop = ice_stop,
+	.ndo_start_xmit = ice_start_xmit,
+	.ndo_features_check = ice_features_check,
+	.ndo_set_rx_mode = ice_set_rx_mode,
+	.ndo_set_mac_address = ice_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ice_change_mtu,
+	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_set_tx_maxrate = ice_set_tx_maxrate,
+	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
+	.ndo_set_vf_mac = ice_set_vf_mac,
+	.ndo_get_vf_config = ice_get_vf_cfg,
+	.ndo_set_vf_trust = ice_set_vf_trust,
+	.ndo_set_vf_vlan = ice_set_vf_port_vlan,
+	.ndo_set_vf_link_state = ice_set_vf_link_state,
+	.ndo_get_vf_stats = ice_get_vf_stats,
+	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
+	.ndo_set_features = ice_set_features,
+	.ndo_bridge_getlink = ice_bridge_getlink,
+	.ndo_bridge_setlink = ice_bridge_setlink,
+	.ndo_fdb_add = ice_fdb_add,
+	.ndo_fdb_del = ice_fdb_del,
+#ifdef CONFIG_RFS_ACCEL
+	.ndo_rx_flow_steer = ice_rx_flow_steer,
+#endif
+	.ndo_tx_timeout = ice_tx_timeout,
+	.ndo_bpf = ice_xdp,
+	.ndo_xdp_xmit = ice_xdp_xmit,
+	.ndo_xsk_wakeup = ice_xsk_wakeup,
+	.ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
+	.ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
+};
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
new file mode 100644
index 000000000..5903a3676
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_aq_read_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be read (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @read_shadow_ram: tell if this is a shadow RAM read
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read the NVM using the admin queue commands (0x0701)
+ */
+static enum ice_status
+ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length,
+		void *data, bool last_command, bool read_shadow_ram,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	struct ice_aqc_nvm *cmd;
+
+	cmd = &desc.params.nvm;
+
+	if (offset > ICE_AQC_NVM_MAX_OFFSET)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read);
+
+	if (!read_shadow_ram && module_typeid == ICE_AQC_NVM_START_POINT)
+		cmd->cmd_flags |= ICE_AQC_NVM_FLASH_ONLY;
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
+	cmd->module_typeid = cpu_to_le16(module_typeid);
+	cmd->offset_low = cpu_to_le16(offset & 0xFFFF);
+	cmd->offset_high = (offset >> 16) & 0xFF;
+	cmd->length = cpu_to_le16(length);
+
+	return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/**
+ * ice_read_flat_nvm - Read portion of NVM by flat offset
+ * @hw: pointer to the HW struct
+ * @offset: offset from beginning of NVM
+ * @length: (in) number of bytes to read; (out) number of bytes actually read
+ * @data: buffer to return data in (sized to fit the specified length)
+ * @read_shadow_ram: if true, read from shadow RAM instead of NVM
+ *
+ * Reads a portion of the NVM, as a flat memory space. This function correctly
+ * breaks read requests across Shadow RAM sectors and ensures that no single
+ * read request exceeds the maximum 4Kb read for a single AdminQ command.
+ *
+ * Returns a status code on failure. Note that the data pointer may be
+ * partially updated if some reads succeed before a failure.
+ */
+enum ice_status
+ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
+		  bool read_shadow_ram)
+{
+	enum ice_status status;
+	u32 inlen = *length;
+	u32 bytes_read = 0;
+	bool last_cmd;
+
+	*length = 0;
+
+	/* Verify the length of the read if this is for the Shadow RAM */
+	if (read_shadow_ram && ((offset + inlen) > (hw->nvm.sr_words * 2u))) {
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM error: requested offset is beyond Shadow RAM limit\n");
+		return ICE_ERR_PARAM;
+	}
+
+	do {
+		u32 read_size, sector_offset;
+
+		/* ice_aq_read_nvm cannot read more than 4Kb at a time.
+		 * Additionally, a read from the Shadow RAM may not cross over
+		 * a sector boundary. Conveniently, the sector size is also
+		 * 4Kb.
+		 */
+		sector_offset = offset % ICE_AQ_MAX_BUF_LEN;
+		read_size = min_t(u32, ICE_AQ_MAX_BUF_LEN - sector_offset,
+				  inlen - bytes_read);
+
+		last_cmd = !(bytes_read + read_size < inlen);
+
+		status = ice_aq_read_nvm(hw, ICE_AQC_NVM_START_POINT,
+					 offset, read_size,
+					 data + bytes_read, last_cmd,
+					 read_shadow_ram, NULL);
+		if (status)
+			break;
+
+		bytes_read += read_size;
+		offset += read_size;
+	} while (!last_cmd);
+
+	*length = bytes_read;
+	return status;
+}
+
+/**
+ * ice_aq_update_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be written (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @command_flags: command parameters
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update the NVM using the admin queue commands (0x0703)
+ */
+enum ice_status
+ice_aq_update_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset,
+		  u16 length, void *data, bool last_command, u8 command_flags,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	struct ice_aqc_nvm *cmd;
+
+	cmd = &desc.params.nvm;
+
+	/* In offset the highest byte must be zeroed. */
+	if (offset & 0xFF000000)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_write);
+
+	cmd->cmd_flags |= command_flags;
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
+	cmd->module_typeid = cpu_to_le16(module_typeid);
+	cmd->offset_low = cpu_to_le16(offset & 0xFFFF);
+	cmd->offset_high = (offset >> 16) & 0xFF;
+	cmd->length = cpu_to_le16(length);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/**
+ * ice_aq_erase_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @cd: pointer to command details structure or NULL
+ *
+ * Erase the NVM sector using the admin queue commands (0x0702)
+ */
+enum ice_status
+ice_aq_erase_nvm(struct ice_hw *hw, u16 module_typeid, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	struct ice_aqc_nvm *cmd;
+
+	cmd = &desc.params.nvm;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_erase);
+
+	cmd->module_typeid = cpu_to_le16(module_typeid);
+	cmd->length = cpu_to_le16(ICE_AQC_NVM_ERASE_LEN);
+	cmd->offset_low = 0;
+	cmd->offset_high = 0;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_read_sr_word_aq - Reads Shadow RAM via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using ice_read_flat_nvm.
+ */
+static enum ice_status
+ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
+{
+	u32 bytes = sizeof(u16);
+	enum ice_status status;
+	__le16 data_local;
+
+	/* Note that ice_read_flat_nvm takes into account the 4Kb AdminQ and
+	 * Shadow RAM sector restrictions necessary when reading from the NVM.
+	 */
+	status = ice_read_flat_nvm(hw, offset * sizeof(u16), &bytes,
+				   (u8 *)&data_local, true);
+	if (status)
+		return status;
+
+	*data = le16_to_cpu(data_local);
+	return 0;
+}
+
+/**
+ * ice_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
+ *
+ * This function will request NVM ownership.
+ */
+enum ice_status
+ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access)
+{
+	if (hw->nvm.blank_nvm_mode)
+		return 0;
+
+	return ice_acquire_res(hw, ICE_NVM_RES_ID, access, ICE_NVM_TIMEOUT);
+}
+
+/**
+ * ice_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
+ *
+ * This function will release NVM ownership.
+ */
+void ice_release_nvm(struct ice_hw *hw)
+{
+	if (hw->nvm.blank_nvm_mode)
+		return;
+
+	ice_release_res(hw, ICE_NVM_RES_ID);
+}
+
+/**
+ * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
+ */
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+{
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (!status) {
+		status = ice_read_sr_word_aq(hw, offset, data);
+		ice_release_nvm(hw);
+	}
+
+	return status;
+}
+
+/**
+ * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA
+ * @hw: pointer to hardware structure
+ * @module_tlv: pointer to module TLV to return
+ * @module_tlv_len: pointer to module TLV length to return
+ * @module_type: module type requested
+ *
+ * Finds the requested sub module TLV type from the Preserved Field
+ * Area (PFA) and returns the TLV pointer and length. The caller can
+ * use these to read the variable length TLV value.
+ */
+enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+		       u16 module_type)
+{
+	enum ice_status status;
+	u16 pfa_len, pfa_ptr;
+	u16 next_tlv;
+
+	status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n");
+		return status;
+	}
+	status = ice_read_sr_word(hw, pfa_ptr, &pfa_len);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n");
+		return status;
+	}
+	/* Starting with first TLV after PFA length, iterate through the list
+	 * of TLVs to find the requested one.
+	 */
+	next_tlv = pfa_ptr + 1;
+	while (next_tlv < pfa_ptr + pfa_len) {
+		u16 tlv_sub_module_type;
+		u16 tlv_len;
+
+		/* Read TLV type */
+		status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n");
+			break;
+		}
+		/* Read TLV length */
+		status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n");
+			break;
+		}
+		if (tlv_sub_module_type == module_type) {
+			if (tlv_len) {
+				*module_tlv = next_tlv;
+				*module_tlv_len = tlv_len;
+				return 0;
+			}
+			return ICE_ERR_INVAL_SIZE;
+		}
+		/* Check next TLV, i.e. current TLV pointer + length + 2 words
+		 * (for current TLV's type and length)
+		 */
+		next_tlv = next_tlv + tlv_len + 2;
+	}
+	/* Module does not exist */
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_read_pba_string - Reads part number string from NVM
+ * @hw: pointer to hardware structure
+ * @pba_num: stores the part number string from the NVM
+ * @pba_num_size: part number string buffer length
+ *
+ * Reads the part number string from the NVM.
+ */
+enum ice_status
+ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size)
+{
+	u16 pba_tlv, pba_tlv_len;
+	enum ice_status status;
+	u16 pba_word, pba_size;
+	u16 i;
+
+	status = ice_get_pfa_module_tlv(hw, &pba_tlv, &pba_tlv_len,
+					ICE_SR_PBA_BLOCK_PTR);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Block TLV.\n");
+		return status;
+	}
+
+	/* pba_size is the next word */
+	status = ice_read_sr_word(hw, (pba_tlv + 2), &pba_size);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Section size.\n");
+		return status;
+	}
+
+	if (pba_tlv_len < pba_size) {
+		ice_debug(hw, ICE_DBG_INIT, "Invalid PBA Block TLV size.\n");
+		return ICE_ERR_INVAL_SIZE;
+	}
+
+	/* Subtract one to get PBA word count (PBA Size word is included in
+	 * total size)
+	 */
+	pba_size--;
+	if (pba_num_size < (((u32)pba_size * 2) + 1)) {
+		ice_debug(hw, ICE_DBG_INIT, "Buffer too small for PBA data.\n");
+		return ICE_ERR_PARAM;
+	}
+
+	for (i = 0; i < pba_size; i++) {
+		status = ice_read_sr_word(hw, (pba_tlv + 2 + 1) + i, &pba_word);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Block word %d.\n", i);
+			return status;
+		}
+
+		pba_num[(i * 2)] = (pba_word >> 8) & 0xFF;
+		pba_num[(i * 2) + 1] = pba_word & 0xFF;
+	}
+	pba_num[(pba_size * 2)] = '\0';
+
+	return status;
+}
+
+/**
+ * ice_get_orom_ver_info - Read Option ROM version information
+ * @hw: pointer to the HW struct
+ *
+ * Read the Combo Image version data from the Boot Configuration TLV and fill
+ * in the option ROM version data.
+ */
+static enum ice_status ice_get_orom_ver_info(struct ice_hw *hw)
+{
+	u16 combo_hi, combo_lo, boot_cfg_tlv, boot_cfg_tlv_len;
+	struct ice_orom_info *orom = &hw->nvm.orom;
+	enum ice_status status;
+	u32 combo_ver;
+
+	status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len,
+					ICE_SR_BOOT_CFG_PTR);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read Boot Configuration Block TLV.\n");
+		return status;
+	}
+
+	/* Boot Configuration Block must have length at least 2 words
+	 * (Combo Image Version High and Combo Image Version Low)
+	 */
+	if (boot_cfg_tlv_len < 2) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Invalid Boot Configuration Block TLV size.\n");
+		return ICE_ERR_INVAL_SIZE;
+	}
+
+	status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OROM_VER_OFF),
+				  &combo_hi);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read OROM_VER hi.\n");
+		return status;
+	}
+
+	status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OROM_VER_OFF + 1),
+				  &combo_lo);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read OROM_VER lo.\n");
+		return status;
+	}
+
+	combo_ver = ((u32)combo_hi << 16) | combo_lo;
+
+	orom->major = (u8)((combo_ver & ICE_OROM_VER_MASK) >>
+			   ICE_OROM_VER_SHIFT);
+	orom->patch = (u8)(combo_ver & ICE_OROM_VER_PATCH_MASK);
+	orom->build = (u16)((combo_ver & ICE_OROM_VER_BUILD_MASK) >>
+			    ICE_OROM_VER_BUILD_SHIFT);
+
+	return 0;
+}
+
+/**
+ * ice_get_netlist_ver_info
+ * @hw: pointer to the HW struct
+ *
+ * Get the netlist version information
+ */
+static enum ice_status ice_get_netlist_ver_info(struct ice_hw *hw)
+{
+	struct ice_netlist_ver_info *ver = &hw->netlist_ver;
+	enum ice_status ret;
+	u32 id_blk_start;
+	__le16 raw_data;
+	u16 data, i;
+	u16 *buff;
+
+	ret = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (ret)
+		return ret;
+	buff = kcalloc(ICE_AQC_NVM_NETLIST_ID_BLK_LEN, sizeof(*buff),
+		       GFP_KERNEL);
+	if (!buff) {
+		ret = ICE_ERR_NO_MEMORY;
+		goto exit_no_mem;
+	}
+
+	/* read module length */
+	ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID,
+			      ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN_OFFSET * 2,
+			      ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN, &raw_data,
+			      false, false, NULL);
+	if (ret)
+		goto exit_error;
+
+	data = le16_to_cpu(raw_data);
+	/* exit if length is = 0 */
+	if (!data)
+		goto exit_error;
+
+	/* read node count */
+	ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID,
+			      ICE_AQC_NVM_NETLIST_NODE_COUNT_OFFSET * 2,
+			      ICE_AQC_NVM_NETLIST_NODE_COUNT_LEN, &raw_data,
+			      false, false, NULL);
+	if (ret)
+		goto exit_error;
+	data = le16_to_cpu(raw_data) & ICE_AQC_NVM_NETLIST_NODE_COUNT_M;
+
+	/* netlist ID block starts from offset 4 + node count * 2 */
+	id_blk_start = ICE_AQC_NVM_NETLIST_ID_BLK_START_OFFSET + data * 2;
+
+	/* read the entire netlist ID block */
+	ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID,
+			      id_blk_start * 2,
+			      ICE_AQC_NVM_NETLIST_ID_BLK_LEN * 2, buff, false,
+			      false, NULL);
+	if (ret)
+		goto exit_error;
+
+	for (i = 0; i < ICE_AQC_NVM_NETLIST_ID_BLK_LEN; i++)
+		buff[i] = le16_to_cpu(((__force __le16 *)buff)[i]);
+
+	ver->major = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16) |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_LOW];
+	ver->minor = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16) |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_LOW];
+	ver->type = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_HIGH] << 16) |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_LOW];
+	ver->rev = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_REV_HIGH] << 16) |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_REV_LOW];
+	ver->cust_ver = buff[ICE_AQC_NVM_NETLIST_ID_BLK_CUST_VER];
+	/* Read the left most 4 bytes of SHA */
+	ver->hash = buff[ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH + 15] << 16 |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH + 14];
+
+exit_error:
+	kfree(buff);
+exit_no_mem:
+	ice_release_nvm(hw);
+	return ret;
+}
+
+/**
+ * ice_discover_flash_size - Discover the available flash size.
+ * @hw: pointer to the HW struct
+ *
+ * The device flash could be up to 16MB in size. However, it is possible that
+ * the actual size is smaller. Use bisection to determine the accessible size
+ * of flash memory.
+ */
+static enum ice_status ice_discover_flash_size(struct ice_hw *hw)
+{
+	u32 min_size = 0, max_size = ICE_AQC_NVM_MAX_OFFSET + 1;
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status)
+		return status;
+
+	while ((max_size - min_size) > 1) {
+		u32 offset = (max_size + min_size) / 2;
+		u32 len = 1;
+		u8 data;
+
+		status = ice_read_flat_nvm(hw, offset, &len, &data, false);
+		if (status == ICE_ERR_AQ_ERROR &&
+		    hw->adminq.sq_last_status == ICE_AQ_RC_EINVAL) {
+			ice_debug(hw, ICE_DBG_NVM,
+				  "%s: New upper bound of %u bytes\n",
+				  __func__, offset);
+			status = 0;
+			max_size = offset;
+		} else if (!status) {
+			ice_debug(hw, ICE_DBG_NVM,
+				  "%s: New lower bound of %u bytes\n",
+				  __func__, offset);
+			min_size = offset;
+		} else {
+			/* an unexpected error occurred */
+			goto err_read_flat_nvm;
+		}
+	}
+
+	ice_debug(hw, ICE_DBG_NVM,
+		  "Predicted flash size is %u bytes\n", max_size);
+
+	hw->nvm.flash_size = max_size;
+
+err_read_flat_nvm:
+	ice_release_nvm(hw);
+
+	return status;
+}
+
+/**
+ * ice_init_nvm - initializes NVM setting
+ * @hw: pointer to the HW struct
+ *
+ * This function reads and populates NVM settings such as Shadow RAM size,
+ * max_timeout, and blank_nvm_mode
+ */
+enum ice_status ice_init_nvm(struct ice_hw *hw)
+{
+	struct ice_nvm_info *nvm = &hw->nvm;
+	u16 eetrack_lo, eetrack_hi, ver;
+	enum ice_status status;
+	u32 fla, gens_stat;
+	u8 sr_size;
+
+	/* The SR size is stored regardless of the NVM programming mode
+	 * as the blank mode may be used in the factory line.
+	 */
+	gens_stat = rd32(hw, GLNVM_GENS);
+	sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S;
+
+	/* Switching to words (sr_size contains power of 2) */
+	nvm->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB;
+
+	/* Check if we are in the normal or blank NVM programming mode */
+	fla = rd32(hw, GLNVM_FLA);
+	if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */
+		nvm->blank_nvm_mode = false;
+	} else {
+		/* Blank programming mode */
+		nvm->blank_nvm_mode = true;
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM init error: unsupported blank mode.\n");
+		return ICE_ERR_NVM_BLANK_MODE;
+	}
+
+	status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &ver);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read DEV starter version.\n");
+		return status;
+	}
+	nvm->major_ver = (ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT;
+	nvm->minor_ver = (ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT;
+
+	status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK lo.\n");
+		return status;
+	}
+	status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_HI, &eetrack_hi);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK hi.\n");
+		return status;
+	}
+
+	nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
+
+	status = ice_discover_flash_size(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM init error: failed to discover flash size.\n");
+		return status;
+	}
+
+	switch (hw->device_id) {
+	/* the following devices do not have boot_cfg_tlv yet */
+	case ICE_DEV_ID_E823C_BACKPLANE:
+	case ICE_DEV_ID_E823C_QSFP:
+	case ICE_DEV_ID_E823C_SFP:
+	case ICE_DEV_ID_E823C_10G_BASE_T:
+	case ICE_DEV_ID_E823C_SGMII:
+	case ICE_DEV_ID_E822C_BACKPLANE:
+	case ICE_DEV_ID_E822C_QSFP:
+	case ICE_DEV_ID_E822C_10G_BASE_T:
+	case ICE_DEV_ID_E822C_SGMII:
+	case ICE_DEV_ID_E822C_SFP:
+	case ICE_DEV_ID_E822L_BACKPLANE:
+	case ICE_DEV_ID_E822L_SFP:
+	case ICE_DEV_ID_E822L_10G_BASE_T:
+	case ICE_DEV_ID_E822L_SGMII:
+	case ICE_DEV_ID_E823L_BACKPLANE:
+	case ICE_DEV_ID_E823L_SFP:
+	case ICE_DEV_ID_E823L_10G_BASE_T:
+	case ICE_DEV_ID_E823L_1GBE:
+	case ICE_DEV_ID_E823L_QSFP:
+		return status;
+	default:
+		break;
+	}
+
+	status = ice_get_orom_ver_info(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read Option ROM info.\n");
+		return status;
+	}
+
+	/* read the netlist version information */
+	status = ice_get_netlist_ver_info(hw);
+	if (status)
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read netlist info.\n");
+
+	return 0;
+}
+
+/**
+ * ice_nvm_validate_checksum
+ * @hw: pointer to the HW struct
+ *
+ * Verify NVM PFA checksum validity (0x0706)
+ */
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw)
+{
+	struct ice_aqc_nvm_checksum *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status)
+		return status;
+
+	cmd = &desc.params.nvm_checksum;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_checksum);
+	cmd->flags = ICE_AQC_NVM_CHECKSUM_VERIFY;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	ice_release_nvm(hw);
+
+	if (!status)
+		if (le16_to_cpu(cmd->checksum) != ICE_AQC_NVM_CHECKSUM_CORRECT)
+			status = ICE_ERR_NVM_CHECKSUM;
+
+	return status;
+}
+
+/**
+ * ice_nvm_write_activate
+ * @hw: pointer to the HW struct
+ * @cmd_flags: NVM activate admin command bits (banks to be validated)
+ *
+ * Update the control word with the required banks' validity bits
+ * and dumps the Shadow RAM to flash (0x0707)
+ */
+enum ice_status ice_nvm_write_activate(struct ice_hw *hw, u8 cmd_flags)
+{
+	struct ice_aqc_nvm *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.nvm;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_write_activate);
+
+	cmd->cmd_flags = cmd_flags;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_nvm_update_empr
+ * @hw: pointer to the HW struct
+ *
+ * Update empr (0x0709). This command allows SW to
+ * request an EMPR to activate new FW.
+ */
+enum ice_status ice_aq_nvm_update_empr(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_update_empr);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/* ice_nvm_set_pkg_data
+ * @hw: pointer to the HW struct
+ * @del_pkg_data_flag: If is set then the current pkg_data store by FW
+ *		       is deleted.
+ *		       If bit is set to 1, then buffer should be size 0.
+ * @data: pointer to buffer
+ * @length: length of the buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set package data (0x070A). This command is equivalent to the reception
+ * of a PLDM FW Update GetPackageData cmd. This command should be sent
+ * as part of the NVM update as the first cmd in the flow.
+ */
+
+enum ice_status
+ice_nvm_set_pkg_data(struct ice_hw *hw, bool del_pkg_data_flag, u8 *data,
+		     u16 length, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_nvm_pkg_data *cmd;
+	struct ice_aq_desc desc;
+
+	if (length != 0 && !data)
+		return ICE_ERR_PARAM;
+
+	cmd = &desc.params.pkg_data;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_pkg_data);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (del_pkg_data_flag)
+		cmd->cmd_flags |= ICE_AQC_NVM_PKG_DELETE;
+
+	return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/* ice_nvm_pass_component_tbl
+ * @hw: pointer to the HW struct
+ * @data: pointer to buffer
+ * @length: length of the buffer
+ * @transfer_flag: parameter for determining stage of the update
+ * @comp_response: a pointer to the response from the 0x070B AQC.
+ * @comp_response_code: a pointer to the response code from the 0x070B AQC.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Pass component table (0x070B). This command is equivalent to the reception
+ * of a PLDM FW Update PassComponentTable cmd. This command should be sent once
+ * per component. It can be only sent after Set Package Data cmd and before
+ * actual update. FW will assume these commands are going to be sent until
+ * the TransferFlag is set to End or StartAndEnd.
+ */
+
+enum ice_status
+ice_nvm_pass_component_tbl(struct ice_hw *hw, u8 *data, u16 length,
+			   u8 transfer_flag, u8 *comp_response,
+			   u8 *comp_response_code, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_nvm_pass_comp_tbl *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (!data || !comp_response || !comp_response_code)
+		return ICE_ERR_PARAM;
+
+	cmd = &desc.params.pass_comp_tbl;
+
+	ice_fill_dflt_direct_cmd_desc(&desc,
+				      ice_aqc_opc_nvm_pass_component_tbl);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->transfer_flag = transfer_flag;
+	status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+
+	if (!status) {
+		*comp_response = cmd->component_response;
+		*comp_response_code = cmd->component_response_code;
+	}
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
new file mode 100644
index 000000000..8d430909f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_NVM_H_
+#define _ICE_NVM_H_
+
+enum ice_status
+ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access);
+void ice_release_nvm(struct ice_hw *hw);
+enum ice_status
+ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
+		  bool read_shadow_ram);
+enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+		       u16 module_type);
+enum ice_status
+ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size);
+enum ice_status ice_init_nvm(struct ice_hw *hw);
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data);
+enum ice_status
+ice_aq_update_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset,
+		  u16 length, void *data, bool last_command, u8 command_flags,
+		  struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_erase_nvm(struct ice_hw *hw, u16 module_typeid, struct ice_sq_cd *cd);
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw);
+enum ice_status ice_nvm_write_activate(struct ice_hw *hw, u8 cmd_flags);
+enum ice_status ice_aq_nvm_update_empr(struct ice_hw *hw);
+enum ice_status
+ice_nvm_set_pkg_data(struct ice_hw *hw, bool del_pkg_data_flag, u8 *data,
+		     u16 length, struct ice_sq_cd *cd);
+enum ice_status
+ice_nvm_pass_component_tbl(struct ice_hw *hw, u8 *data, u16 length,
+			   u8 transfer_flag, u8 *comp_response,
+			   u8 *comp_response_code, struct ice_sq_cd *cd);
+#endif /* _ICE_NVM_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
new file mode 100644
index 000000000..f57c414bc
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_OSDEP_H_
+#define _ICE_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/io.h>
+#ifndef CONFIG_64BIT
+#include <linux/io-64-nonatomic-lo-hi.h>
+#endif
+
+#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+
+#define ice_flush(a)		rd32((a), GLGEN_STAT)
+#define ICE_M(m, s)		((m) << (s))
+
+struct ice_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	size_t size;
+};
+
+#define ice_hw_to_dev(ptr)	\
+	(&(container_of((ptr), struct ice_pf, hw))->pdev->dev)
+
+#ifdef CONFIG_DYNAMIC_DEBUG
+#define ice_debug(hw, type, fmt, args...) \
+	dev_dbg(ice_hw_to_dev(hw), fmt, ##args)
+
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+	print_hex_dump_debug(KBUILD_MODNAME " ",		\
+			     DUMP_PREFIX_OFFSET, rowsize,	\
+			     groupsize, buf, len, false)
+#else
+#define ice_debug(hw, type, fmt, args...)			\
+do {								\
+	if ((type) & (hw)->debug_mask)				\
+		dev_info(ice_hw_to_dev(hw), fmt, ##args);	\
+} while (0)
+
+#ifdef DEBUG
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do {								\
+	if ((type) & (hw)->debug_mask)				\
+		print_hex_dump_debug(KBUILD_MODNAME,		\
+				     DUMP_PREFIX_OFFSET,	\
+				     rowsize, groupsize, buf,	\
+				     len, false);		\
+} while (0)
+#else
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do {								\
+	struct ice_hw *hw_l = hw;				\
+	if ((type) & (hw_l)->debug_mask) {			\
+		u16 len_l = len;				\
+		u8 *buf_l = buf;				\
+		int i;						\
+		for (i = 0; i < (len_l - 16); i += 16)		\
+			ice_debug(hw_l, type, "0x%04X  %16ph\n",\
+				  i, ((buf_l) + i));		\
+		if (i < len_l)					\
+			ice_debug(hw_l, type, "0x%04X  %*ph\n", \
+				  i, ((len_l) - i), ((buf_l) + i));\
+	}							\
+} while (0)
+#endif /* DEBUG */
+#endif /* CONFIG_DYNAMIC_DEBUG */
+
+#endif /* _ICE_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
new file mode 100644
index 000000000..7f4c1ec1e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_PROTOCOL_TYPE_H_
+#define _ICE_PROTOCOL_TYPE_H_
+/* Decoders for ice_prot_id:
+ * - F: First
+ * - I: Inner
+ * - L: Last
+ * - O: Outer
+ * - S: Single
+ */
+enum ice_prot_id {
+	ICE_PROT_ID_INVAL	= 0,
+	ICE_PROT_MAC_OF_OR_S	= 1,
+	ICE_PROT_IPV4_OF_OR_S	= 32,
+	ICE_PROT_IPV4_IL	= 33,
+	ICE_PROT_IPV6_OF_OR_S	= 40,
+	ICE_PROT_IPV6_IL	= 41,
+	ICE_PROT_TCP_IL		= 49,
+	ICE_PROT_UDP_OF		= 52,
+	ICE_PROT_UDP_IL_OR_S	= 53,
+	ICE_PROT_GRE_OF		= 64,
+	ICE_PROT_SCTP_IL	= 96,
+	ICE_PROT_META_ID	= 255, /* when offset == metadata */
+	ICE_PROT_INVALID	= 255  /* when offset == ICE_FV_OFFSET_INVAL */
+};
+#endif /* _ICE_PROTOCOL_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
new file mode 100644
index 000000000..44a228530
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -0,0 +1,2968 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_sched.h"
+
+/**
+ * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
+ * @pi: port information structure
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts the root node of the scheduling tree topology
+ * to the SW DB.
+ */
+static enum ice_status
+ice_sched_add_root_node(struct ice_port_info *pi,
+			struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_sched_node *root;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return ICE_ERR_NO_MEMORY;
+
+	/* coverity[suspicious_sizeof] */
+	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
+				      sizeof(*root), GFP_KERNEL);
+	if (!root->children) {
+		devm_kfree(ice_hw_to_dev(hw), root);
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	memcpy(&root->info, info, sizeof(*info));
+	pi->root = root;
+	return 0;
+}
+
+/**
+ * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
+ * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
+ * @teid: node TEID to search
+ *
+ * This function searches for a node matching the TEID in the scheduling tree
+ * from the SW DB. The search is recursive and is restricted by the number of
+ * layers it has searched through; stopping at the max supported layer.
+ *
+ * This function needs to be called when holding the port_info->sched_lock
+ */
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
+{
+	u16 i;
+
+	/* The TEID is same as that of the start_node */
+	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
+		return start_node;
+
+	/* The node has no children or is at the max layer */
+	if (!start_node->num_children ||
+	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
+	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
+		return NULL;
+
+	/* Check if TEID matches to any of the children nodes */
+	for (i = 0; i < start_node->num_children; i++)
+		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
+			return start_node->children[i];
+
+	/* Search within each child's sub-tree */
+	for (i = 0; i < start_node->num_children; i++) {
+		struct ice_sched_node *tmp;
+
+		tmp = ice_sched_find_node_by_teid(start_node->children[i],
+						  teid);
+		if (tmp)
+			return tmp;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
+ * @hw: pointer to the HW struct
+ * @cmd_opc: cmd opcode
+ * @elems_req: number of elements to request
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_resp: returns total number of elements response
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function sends a scheduling elements cmd (cmd_opc)
+ */
+static enum ice_status
+ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
+			    u16 elems_req, void *buf, u16 buf_size,
+			    u16 *elems_resp, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_sched_elem_cmd *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.sched_elem_cmd;
+	ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
+	cmd->num_elem_req = cpu_to_le16(elems_req);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && elems_resp)
+		*elems_resp = le16_to_cpu(cmd->num_elem_resp);
+
+	return status;
+}
+
+/**
+ * ice_aq_query_sched_elems - query scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to query
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements returned
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduling elements (0x0404)
+ */
+enum ice_status
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+			 struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+			 u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
+}
+
+/**
+ * ice_sched_add_node - Insert the Tx scheduler node in SW DB
+ * @pi: port information structure
+ * @layer: Scheduler layer of the node
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts a scheduler node to the SW DB.
+ */
+enum ice_status
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+		   struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_aqc_txsched_elem_data elem;
+	struct ice_sched_node *parent;
+	struct ice_sched_node *node;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	/* A valid parent node should be there */
+	parent = ice_sched_find_node_by_teid(pi->root,
+					     le32_to_cpu(info->parent_teid));
+	if (!parent) {
+		ice_debug(hw, ICE_DBG_SCHED,
+			  "Parent Node not found for parent_teid=0x%x\n",
+			  le32_to_cpu(info->parent_teid));
+		return ICE_ERR_PARAM;
+	}
+
+	/* query the current node information from FW before adding it
+	 * to the SW DB
+	 */
+	status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
+	if (status)
+		return status;
+
+	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return ICE_ERR_NO_MEMORY;
+	if (hw->max_children[layer]) {
+		/* coverity[suspicious_sizeof] */
+		node->children = devm_kcalloc(ice_hw_to_dev(hw),
+					      hw->max_children[layer],
+					      sizeof(*node), GFP_KERNEL);
+		if (!node->children) {
+			devm_kfree(ice_hw_to_dev(hw), node);
+			return ICE_ERR_NO_MEMORY;
+		}
+	}
+
+	node->in_use = true;
+	node->parent = parent;
+	node->tx_sched_layer = layer;
+	parent->children[parent->num_children++] = node;
+	node->info = elem;
+	return 0;
+}
+
+/**
+ * ice_aq_delete_sched_elems - delete scheduler elements
+ * @hw: pointer to the HW struct
+ * @grps_req: number of groups to delete
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_del: returns total number of elements deleted
+ * @cd: pointer to command details structure or NULL
+ *
+ * Delete scheduling elements (0x040F)
+ */
+static enum ice_status
+ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
+			  struct ice_aqc_delete_elem *buf, u16 buf_size,
+			  u16 *grps_del, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
+					   grps_req, (void *)buf, buf_size,
+					   grps_del, cd);
+}
+
+/**
+ * ice_sched_remove_elems - remove nodes from HW
+ * @hw: pointer to the HW struct
+ * @parent: pointer to the parent node
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be deleted
+ *
+ * This function remove nodes from HW
+ */
+static enum ice_status
+ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
+		       u16 num_nodes, u32 *node_teids)
+{
+	struct ice_aqc_delete_elem *buf;
+	u16 i, num_groups_removed = 0;
+	enum ice_status status;
+	u16 buf_size;
+
+	buf_size = struct_size(buf, teid, num_nodes);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	buf->hdr.parent_teid = parent->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(num_nodes);
+	for (i = 0; i < num_nodes; i++)
+		buf->teid[i] = cpu_to_le32(node_teids[i]);
+
+	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
+					   &num_groups_removed, NULL);
+	if (status || num_groups_removed != 1)
+		ice_debug(hw, ICE_DBG_SCHED, "remove node failed FW error %d\n",
+			  hw->adminq.sq_last_status);
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_get_first_node - get the first node of the given layer
+ * @pi: port information structure
+ * @parent: pointer the base node of the subtree
+ * @layer: layer number
+ *
+ * This function retrieves the first node of the given layer from the subtree
+ */
+static struct ice_sched_node *
+ice_sched_get_first_node(struct ice_port_info *pi,
+			 struct ice_sched_node *parent, u8 layer)
+{
+	return pi->sib_head[parent->tc_num][layer];
+}
+
+/**
+ * ice_sched_get_tc_node - get pointer to TC node
+ * @pi: port information structure
+ * @tc: TC number
+ *
+ * This function returns the TC node pointer
+ */
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
+{
+	u8 i;
+
+	if (!pi || !pi->root)
+		return NULL;
+	for (i = 0; i < pi->root->num_children; i++)
+		if (pi->root->children[i]->tc_num == tc)
+			return pi->root->children[i];
+	return NULL;
+}
+
+/**
+ * ice_free_sched_node - Free a Tx scheduler node from SW DB
+ * @pi: port information structure
+ * @node: pointer to the ice_sched_node struct
+ *
+ * This function frees up a node from SW DB as well as from HW
+ *
+ * This function needs to be called with the port_info->sched_lock held
+ */
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+	struct ice_sched_node *parent;
+	struct ice_hw *hw = pi->hw;
+	u8 i, j;
+
+	/* Free the children before freeing up the parent node
+	 * The parent array is updated below and that shifts the nodes
+	 * in the array. So always pick the first child if num children > 0
+	 */
+	while (node->num_children)
+		ice_free_sched_node(pi, node->children[0]);
+
+	/* Leaf, TC and root nodes can't be deleted by SW */
+	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
+		u32 teid = le32_to_cpu(node->info.node_teid);
+
+		ice_sched_remove_elems(hw, node->parent, 1, &teid);
+	}
+	parent = node->parent;
+	/* root has no parent */
+	if (parent) {
+		struct ice_sched_node *p;
+
+		/* update the parent */
+		for (i = 0; i < parent->num_children; i++)
+			if (parent->children[i] == node) {
+				for (j = i + 1; j < parent->num_children; j++)
+					parent->children[j - 1] =
+						parent->children[j];
+				parent->num_children--;
+				break;
+			}
+
+		p = ice_sched_get_first_node(pi, node, node->tx_sched_layer);
+		while (p) {
+			if (p->sibling == node) {
+				p->sibling = node->sibling;
+				break;
+			}
+			p = p->sibling;
+		}
+
+		/* update the sibling head if head is getting removed */
+		if (pi->sib_head[node->tc_num][node->tx_sched_layer] == node)
+			pi->sib_head[node->tc_num][node->tx_sched_layer] =
+				node->sibling;
+	}
+
+	/* leaf nodes have no children */
+	if (node->children)
+		devm_kfree(ice_hw_to_dev(hw), node->children);
+	devm_kfree(ice_hw_to_dev(hw), node);
+}
+
+/**
+ * ice_aq_get_dflt_topo - gets default scheduler topology
+ * @hw: pointer to the HW struct
+ * @lport: logical port number
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_branches: returns total number of queue to port branches
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get default scheduler topology (0x400)
+ */
+static enum ice_status
+ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
+		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
+		     u8 *num_branches, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_topo *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_topo;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
+	cmd->port_num = lport;
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && num_branches)
+		*num_branches = cmd->num_branches;
+
+	return status;
+}
+
+/**
+ * ice_aq_add_sched_elems - adds scheduling element
+ * @hw: pointer to the HW struct
+ * @grps_req: the number of groups that are requested to be added
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_added: returns total number of groups added
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add scheduling elements (0x0401)
+ */
+static enum ice_status
+ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
+		       struct ice_aqc_add_elem *buf, u16 buf_size,
+		       u16 *grps_added, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
+					   grps_req, (void *)buf, buf_size,
+					   grps_added, cd);
+}
+
+/**
+ * ice_aq_cfg_sched_elems - configures scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to configure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_cfgd: returns total number of elements configured
+ * @cd: pointer to command details structure or NULL
+ *
+ * Configure scheduling elements (0x0403)
+ */
+static enum ice_status
+ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req,
+		       struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+		       u16 *elems_cfgd, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_cfg_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_cfgd, cd);
+}
+
+/**
+ * ice_aq_suspend_sched_elems - suspend scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to suspend
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements suspended
+ * @cd: pointer to command details structure or NULL
+ *
+ * Suspend scheduling elements (0x0409)
+ */
+static enum ice_status
+ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req, __le32 *buf,
+			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
+}
+
+/**
+ * ice_aq_resume_sched_elems - resume scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to resume
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements resumed
+ * @cd: pointer to command details structure or NULL
+ *
+ * resume scheduling elements (0x040A)
+ */
+static enum ice_status
+ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req, __le32 *buf,
+			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
+}
+
+/**
+ * ice_aq_query_sched_res - query scheduler resource
+ * @hw: pointer to the HW struct
+ * @buf_size: buffer size in bytes
+ * @buf: pointer to buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduler resource allocation (0x0412)
+ */
+static enum ice_status
+ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
+		       struct ice_aqc_query_txsched_res_resp *buf,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_sched_suspend_resume_elems - suspend or resume HW nodes
+ * @hw: pointer to the HW struct
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be suspended or resumed
+ * @suspend: true means suspend / false means resume
+ *
+ * This function suspends or resumes HW nodes
+ */
+static enum ice_status
+ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
+			       bool suspend)
+{
+	u16 i, buf_size, num_elem_ret = 0;
+	enum ice_status status;
+	__le32 *buf;
+
+	buf_size = sizeof(*buf) * num_nodes;
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	for (i = 0; i < num_nodes; i++)
+		buf[i] = cpu_to_le32(node_teids[i]);
+
+	if (suspend)
+		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
+						    buf_size, &num_elem_ret,
+						    NULL);
+	else
+		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
+						   buf_size, &num_elem_ret,
+						   NULL);
+	if (status || num_elem_ret != num_nodes)
+		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_alloc_lan_q_ctx - allocate LAN queue contexts for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @tc: TC number
+ * @new_numqs: number of queues
+ */
+static enum ice_status
+ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
+{
+	struct ice_vsi_ctx *vsi_ctx;
+	struct ice_q_ctx *q_ctx;
+
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return ICE_ERR_PARAM;
+	/* allocate LAN queue contexts */
+	if (!vsi_ctx->lan_q_ctx[tc]) {
+		vsi_ctx->lan_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw),
+						      new_numqs,
+						      sizeof(*q_ctx),
+						      GFP_KERNEL);
+		if (!vsi_ctx->lan_q_ctx[tc])
+			return ICE_ERR_NO_MEMORY;
+		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
+		return 0;
+	}
+	/* num queues are increased, update the queue contexts */
+	if (new_numqs > vsi_ctx->num_lan_q_entries[tc]) {
+		u16 prev_num = vsi_ctx->num_lan_q_entries[tc];
+
+		q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
+				     sizeof(*q_ctx), GFP_KERNEL);
+		if (!q_ctx)
+			return ICE_ERR_NO_MEMORY;
+		memcpy(q_ctx, vsi_ctx->lan_q_ctx[tc],
+		       prev_num * sizeof(*q_ctx));
+		devm_kfree(ice_hw_to_dev(hw), vsi_ctx->lan_q_ctx[tc]);
+		vsi_ctx->lan_q_ctx[tc] = q_ctx;
+		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
+	}
+	return 0;
+}
+
+/**
+ * ice_aq_rl_profile - performs a rate limiting task
+ * @hw: pointer to the HW struct
+ * @opcode: opcode for add, query, or remove profile(s)
+ * @num_profiles: the number of profiles
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_processed: number of processed add or remove profile(s) to return
+ * @cd: pointer to command details structure
+ *
+ * RL profile function to add, query, or remove profile(s)
+ */
+static enum ice_status
+ice_aq_rl_profile(struct ice_hw *hw, enum ice_adminq_opc opcode,
+		  u16 num_profiles, struct ice_aqc_rl_profile_elem *buf,
+		  u16 buf_size, u16 *num_processed, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_rl_profile *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.rl_profile;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->num_profiles = cpu_to_le16(num_profiles);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && num_processed)
+		*num_processed = le16_to_cpu(cmd->num_processed);
+	return status;
+}
+
+/**
+ * ice_aq_add_rl_profile - adds rate limiting profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to be add
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_added: total number of profiles added to return
+ * @cd: pointer to command details structure
+ *
+ * Add RL profile (0x0410)
+ */
+static enum ice_status
+ice_aq_add_rl_profile(struct ice_hw *hw, u16 num_profiles,
+		      struct ice_aqc_rl_profile_elem *buf, u16 buf_size,
+		      u16 *num_profiles_added, struct ice_sq_cd *cd)
+{
+	return ice_aq_rl_profile(hw, ice_aqc_opc_add_rl_profiles, num_profiles,
+				 buf, buf_size, num_profiles_added, cd);
+}
+
+/**
+ * ice_aq_remove_rl_profile - removes RL profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to remove
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_removed: total number of profiles removed to return
+ * @cd: pointer to command details structure or NULL
+ *
+ * Remove RL profile (0x0415)
+ */
+static enum ice_status
+ice_aq_remove_rl_profile(struct ice_hw *hw, u16 num_profiles,
+			 struct ice_aqc_rl_profile_elem *buf, u16 buf_size,
+			 u16 *num_profiles_removed, struct ice_sq_cd *cd)
+{
+	return ice_aq_rl_profile(hw, ice_aqc_opc_remove_rl_profiles,
+				 num_profiles, buf, buf_size,
+				 num_profiles_removed, cd);
+}
+
+/**
+ * ice_sched_del_rl_profile - remove RL profile
+ * @hw: pointer to the HW struct
+ * @rl_info: rate limit profile information
+ *
+ * If the profile ID is not referenced anymore, it removes profile ID with
+ * its associated parameters from HW DB,and locally. The caller needs to
+ * hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_del_rl_profile(struct ice_hw *hw,
+			 struct ice_aqc_rl_profile_info *rl_info)
+{
+	struct ice_aqc_rl_profile_elem *buf;
+	u16 num_profiles_removed;
+	enum ice_status status;
+	u16 num_profiles = 1;
+
+	if (rl_info->prof_id_ref != 0)
+		return ICE_ERR_IN_USE;
+
+	/* Safe to remove profile ID */
+	buf = &rl_info->profile;
+	status = ice_aq_remove_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+					  &num_profiles_removed, NULL);
+	if (status || num_profiles_removed != num_profiles)
+		return ICE_ERR_CFG;
+
+	/* Delete stale entry now */
+	list_del(&rl_info->list_entry);
+	devm_kfree(ice_hw_to_dev(hw), rl_info);
+	return status;
+}
+
+/**
+ * ice_sched_clear_rl_prof - clears RL prof entries
+ * @pi: port information structure
+ *
+ * This function removes all RL profile from HW as well as from SW DB.
+ */
+static void ice_sched_clear_rl_prof(struct ice_port_info *pi)
+{
+	u16 ln;
+
+	for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+		struct ice_aqc_rl_profile_info *rl_prof_elem;
+		struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+		list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+					 &pi->rl_prof_list[ln], list_entry) {
+			struct ice_hw *hw = pi->hw;
+			enum ice_status status;
+
+			rl_prof_elem->prof_id_ref = 0;
+			status = ice_sched_del_rl_profile(hw, rl_prof_elem);
+			if (status) {
+				ice_debug(hw, ICE_DBG_SCHED,
+					  "Remove rl profile failed\n");
+				/* On error, free mem required */
+				list_del(&rl_prof_elem->list_entry);
+				devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+			}
+		}
+	}
+}
+
+/**
+ * ice_sched_clear_agg - clears the aggregator related information
+ * @hw: pointer to the hardware structure
+ *
+ * This function removes aggregator list and free up aggregator related memory
+ * previously allocated.
+ */
+void ice_sched_clear_agg(struct ice_hw *hw)
+{
+	struct ice_sched_agg_info *agg_info;
+	struct ice_sched_agg_info *atmp;
+
+	list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(agg_vsi_info, vtmp,
+					 &agg_info->agg_vsi_list, list_entry) {
+			list_del(&agg_vsi_info->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
+		}
+		list_del(&agg_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), agg_info);
+	}
+}
+
+/**
+ * ice_sched_clear_tx_topo - clears the scheduler tree nodes
+ * @pi: port information structure
+ *
+ * This function removes all the nodes from HW as well as from SW DB.
+ */
+static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
+{
+	if (!pi)
+		return;
+	/* remove RL profiles related lists */
+	ice_sched_clear_rl_prof(pi);
+	if (pi->root) {
+		ice_free_sched_node(pi, pi->root);
+		pi->root = NULL;
+	}
+}
+
+/**
+ * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
+ * @pi: port information structure
+ *
+ * Cleanup scheduling elements from SW DB
+ */
+void ice_sched_clear_port(struct ice_port_info *pi)
+{
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return;
+
+	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
+	mutex_lock(&pi->sched_lock);
+	ice_sched_clear_tx_topo(pi);
+	mutex_unlock(&pi->sched_lock);
+	mutex_destroy(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
+ * @hw: pointer to the HW struct
+ *
+ * Cleanup scheduling elements from SW DB for all the ports
+ */
+void ice_sched_cleanup_all(struct ice_hw *hw)
+{
+	if (!hw)
+		return;
+
+	if (hw->layer_info) {
+		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
+		hw->layer_info = NULL;
+	}
+
+	ice_sched_clear_port(hw->port_info);
+
+	hw->num_tx_sched_layers = 0;
+	hw->num_tx_sched_phys_layers = 0;
+	hw->flattened_layers = 0;
+	hw->max_cgds = 0;
+}
+
+/**
+ * ice_sched_add_elems - add nodes to HW and SW DB
+ * @pi: port information structure
+ * @tc_node: pointer to the branch node
+ * @parent: pointer to the parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes
+ * @num_nodes_added: pointer to num nodes added
+ * @first_node_teid: if new nodes are added then return the TEID of first node
+ *
+ * This function add nodes to HW as well as to SW DB for a given layer
+ */
+static enum ice_status
+ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
+		    u16 *num_nodes_added, u32 *first_node_teid)
+{
+	struct ice_sched_node *prev, *new_node;
+	struct ice_aqc_add_elem *buf;
+	u16 i, num_groups_added = 0;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	size_t buf_size;
+	u32 teid;
+
+	buf_size = struct_size(buf, generic, num_nodes);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	buf->hdr.parent_teid = parent->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(num_nodes);
+	for (i = 0; i < num_nodes; i++) {
+		buf->generic[i].parent_teid = parent->info.node_teid;
+		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
+		buf->generic[i].data.valid_sections =
+			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+			ICE_AQC_ELEM_VALID_EIR;
+		buf->generic[i].data.generic = 0;
+		buf->generic[i].data.cir_bw.bw_profile_idx =
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->generic[i].data.cir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+		buf->generic[i].data.eir_bw.bw_profile_idx =
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->generic[i].data.eir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+	}
+
+	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
+					&num_groups_added, NULL);
+	if (status || num_groups_added != 1) {
+		ice_debug(hw, ICE_DBG_SCHED, "add node failed FW Error %d\n",
+			  hw->adminq.sq_last_status);
+		devm_kfree(ice_hw_to_dev(hw), buf);
+		return ICE_ERR_CFG;
+	}
+
+	*num_nodes_added = num_nodes;
+	/* add nodes to the SW DB */
+	for (i = 0; i < num_nodes; i++) {
+		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "add nodes in SW DB failed status =%d\n",
+				  status);
+			break;
+		}
+
+		teid = le32_to_cpu(buf->generic[i].node_teid);
+		new_node = ice_sched_find_node_by_teid(parent, teid);
+		if (!new_node) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "Node is missing for teid =%d\n", teid);
+			break;
+		}
+
+		new_node->sibling = NULL;
+		new_node->tc_num = tc_node->tc_num;
+
+		/* add it to previous node sibling pointer */
+		/* Note: siblings are not linked across branches */
+		prev = ice_sched_get_first_node(pi, tc_node, layer);
+		if (prev && prev != new_node) {
+			while (prev->sibling)
+				prev = prev->sibling;
+			prev->sibling = new_node;
+		}
+
+		/* initialize the sibling head */
+		if (!pi->sib_head[tc_node->tc_num][layer])
+			pi->sib_head[tc_node->tc_num][layer] = new_node;
+
+		if (i == 0)
+			*first_node_teid = teid;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_add_nodes_to_layer - Add nodes to a given layer
+ * @pi: port information structure
+ * @tc_node: pointer to TC node
+ * @parent: pointer to parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes to be added
+ * @first_node_teid: pointer to the first node TEID
+ * @num_nodes_added: pointer to number of nodes added
+ *
+ * This function add nodes to a given layer.
+ */
+static enum ice_status
+ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
+			     struct ice_sched_node *tc_node,
+			     struct ice_sched_node *parent, u8 layer,
+			     u16 num_nodes, u32 *first_node_teid,
+			     u16 *num_nodes_added)
+{
+	u32 *first_teid_ptr = first_node_teid;
+	u16 new_num_nodes, max_child_nodes;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	u16 num_added = 0;
+	u32 temp;
+
+	*num_nodes_added = 0;
+
+	if (!num_nodes)
+		return status;
+
+	if (!parent || layer < hw->sw_entry_point_layer)
+		return ICE_ERR_PARAM;
+
+	/* max children per node per layer */
+	max_child_nodes = hw->max_children[parent->tx_sched_layer];
+
+	/* current number of children + required nodes exceed max children ? */
+	if ((parent->num_children + num_nodes) > max_child_nodes) {
+		/* Fail if the parent is a TC node */
+		if (parent == tc_node)
+			return ICE_ERR_CFG;
+
+		/* utilize all the spaces if the parent is not full */
+		if (parent->num_children < max_child_nodes) {
+			new_num_nodes = max_child_nodes - parent->num_children;
+			/* this recursion is intentional, and wouldn't
+			 * go more than 2 calls
+			 */
+			status = ice_sched_add_nodes_to_layer(pi, tc_node,
+							      parent, layer,
+							      new_num_nodes,
+							      first_node_teid,
+							      &num_added);
+			if (status)
+				return status;
+
+			*num_nodes_added += num_added;
+		}
+		/* Don't modify the first node TEID memory if the first node was
+		 * added already in the above call. Instead send some temp
+		 * memory for all other recursive calls.
+		 */
+		if (num_added)
+			first_teid_ptr = &temp;
+
+		new_num_nodes = num_nodes - num_added;
+
+		/* This parent is full, try the next sibling */
+		parent = parent->sibling;
+
+		/* this recursion is intentional, for 1024 queues
+		 * per VSI, it goes max of 16 iterations.
+		 * 1024 / 8 = 128 layer 8 nodes
+		 * 128 /8 = 16 (add 8 nodes per iteration)
+		 */
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
+						      layer, new_num_nodes,
+						      first_teid_ptr,
+						      &num_added);
+		*num_nodes_added += num_added;
+		return status;
+	}
+
+	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
+				     num_nodes_added, first_node_teid);
+	return status;
+}
+
+/**
+ * ice_sched_get_qgrp_layer - get the current queue group layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current queue group layer number
+ */
+static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
+{
+	/* It's always total layers - 1, the array is 0 relative so -2 */
+	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
+}
+
+/**
+ * ice_sched_get_vsi_layer - get the current VSI layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current VSI layer number
+ */
+static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
+{
+	/* Num Layers       VSI layer
+	 *     9               6
+	 *     7               4
+	 *     5 or less       sw_entry_point_layer
+	 */
+	/* calculate the VSI layer based on number of layers. */
+	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
+		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
+
+		if (layer > hw->sw_entry_point_layer)
+			return layer;
+	}
+	return hw->sw_entry_point_layer;
+}
+
+/**
+ * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
+ * @pi: port information structure
+ *
+ * This function removes the leaf node that was created by the FW
+ * during initialization
+ */
+static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
+{
+	struct ice_sched_node *node;
+
+	node = pi->root;
+	while (node) {
+		if (!node->num_children)
+			break;
+		node = node->children[0];
+	}
+	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
+		u32 teid = le32_to_cpu(node->info.node_teid);
+		enum ice_status status;
+
+		/* remove the default leaf node */
+		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
+		if (!status)
+			ice_free_sched_node(pi, node);
+	}
+}
+
+/**
+ * ice_sched_rm_dflt_nodes - free the default nodes in the tree
+ * @pi: port information structure
+ *
+ * This function frees all the nodes except root and TC that were created by
+ * the FW during initialization
+ */
+static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
+{
+	struct ice_sched_node *node;
+
+	ice_rm_dflt_leaf_node(pi);
+
+	/* remove the default nodes except TC and root nodes */
+	node = pi->root;
+	while (node) {
+		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
+		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
+			ice_free_sched_node(pi, node);
+			break;
+		}
+
+		if (!node->num_children)
+			break;
+		node = node->children[0];
+	}
+}
+
+/**
+ * ice_sched_init_port - Initialize scheduler by querying information from FW
+ * @pi: port info structure for the tree to cleanup
+ *
+ * This function is the initial call to find the total number of Tx scheduler
+ * resources, default topology created by firmware and storing the information
+ * in SW DB.
+ */
+enum ice_status ice_sched_init_port(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_topo_elem *buf;
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 num_branches;
+	u16 num_elems;
+	u8 i, j;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+
+	/* Query the Default Topology from FW */
+	buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Query default scheduling tree topology */
+	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
+				      &num_branches, NULL);
+	if (status)
+		goto err_init_port;
+
+	/* num_branches should be between 1-8 */
+	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
+		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
+			  num_branches);
+		status = ICE_ERR_PARAM;
+		goto err_init_port;
+	}
+
+	/* get the number of elements on the default/first branch */
+	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
+
+	/* num_elems should always be between 1-9 */
+	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
+		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
+			  num_elems);
+		status = ICE_ERR_PARAM;
+		goto err_init_port;
+	}
+
+	/* If the last node is a leaf node then the index of the queue group
+	 * layer is two less than the number of elements.
+	 */
+	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
+	    ICE_AQC_ELEM_TYPE_LEAF)
+		pi->last_node_teid =
+			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
+	else
+		pi->last_node_teid =
+			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
+
+	/* Insert the Tx Sched root node */
+	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
+	if (status)
+		goto err_init_port;
+
+	/* Parse the default tree and cache the information */
+	for (i = 0; i < num_branches; i++) {
+		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
+
+		/* Skip root element as already inserted */
+		for (j = 1; j < num_elems; j++) {
+			/* update the sw entry point */
+			if (buf[0].generic[j].data.elem_type ==
+			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
+				hw->sw_entry_point_layer = j;
+
+			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
+			if (status)
+				goto err_init_port;
+		}
+	}
+
+	/* Remove the default nodes. */
+	if (pi->root)
+		ice_sched_rm_dflt_nodes(pi);
+
+	/* initialize the port for handling the scheduler tree */
+	pi->port_state = ICE_SCHED_PORT_STATE_READY;
+	mutex_init(&pi->sched_lock);
+	for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+		INIT_LIST_HEAD(&pi->rl_prof_list[i]);
+
+err_init_port:
+	if (status && pi->root) {
+		ice_free_sched_node(pi, pi->root);
+		pi->root = NULL;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_query_res_alloc - query the FW for num of logical sched layers
+ * @hw: pointer to the HW struct
+ *
+ * query FW for allocated scheduler resources and store in HW struct
+ */
+enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
+{
+	struct ice_aqc_query_txsched_res_resp *buf;
+	enum ice_status status = 0;
+	__le16 max_sibl;
+	u16 i;
+
+	if (hw->layer_info)
+		return status;
+
+	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
+	if (status)
+		goto sched_query_out;
+
+	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
+	hw->num_tx_sched_phys_layers =
+		le16_to_cpu(buf->sched_props.phys_levels);
+	hw->flattened_layers = buf->sched_props.flattening_bitmap;
+	hw->max_cgds = buf->sched_props.max_pf_cgds;
+
+	/* max sibling group size of current layer refers to the max children
+	 * of the below layer node.
+	 * layer 1 node max children will be layer 2 max sibling group size
+	 * layer 2 node max children will be layer 3 max sibling group size
+	 * and so on. This array will be populated from root (index 0) to
+	 * qgroup layer 7. Leaf node has no children.
+	 */
+	for (i = 0; i < hw->num_tx_sched_layers - 1; i++) {
+		max_sibl = buf->layer_props[i + 1].max_sibl_grp_sz;
+		hw->max_children[i] = le16_to_cpu(max_sibl);
+	}
+
+	hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
+				      (hw->num_tx_sched_layers *
+				       sizeof(*hw->layer_info)),
+				      GFP_KERNEL);
+	if (!hw->layer_info) {
+		status = ICE_ERR_NO_MEMORY;
+		goto sched_query_out;
+	}
+
+sched_query_out:
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_find_node_in_subtree - Find node in part of base node subtree
+ * @hw: pointer to the HW struct
+ * @base: pointer to the base node
+ * @node: pointer to the node to search
+ *
+ * This function checks whether a given node is part of the base node
+ * subtree or not
+ */
+static bool
+ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
+			       struct ice_sched_node *node)
+{
+	u8 i;
+
+	for (i = 0; i < base->num_children; i++) {
+		struct ice_sched_node *child = base->children[i];
+
+		if (node == child)
+			return true;
+
+		if (child->tx_sched_layer > node->tx_sched_layer)
+			return false;
+
+		/* this recursion is intentional, and wouldn't
+		 * go more than 8 calls
+		 */
+		if (ice_sched_find_node_in_subtree(hw, child, node))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * ice_sched_get_free_qgrp - Scan all queue group siblings and find a free node
+ * @pi: port information structure
+ * @vsi_node: software VSI handle
+ * @qgrp_node: first queue group node identified for scanning
+ * @owner: LAN or RDMA
+ *
+ * This function retrieves a free LAN or RDMA queue group node by scanning
+ * qgrp_node and its siblings for the queue group with the fewest number
+ * of queues currently assigned.
+ */
+static struct ice_sched_node *
+ice_sched_get_free_qgrp(struct ice_port_info *pi,
+			struct ice_sched_node *vsi_node,
+			struct ice_sched_node *qgrp_node, u8 owner)
+{
+	struct ice_sched_node *min_qgrp;
+	u8 min_children;
+
+	if (!qgrp_node)
+		return qgrp_node;
+	min_children = qgrp_node->num_children;
+	if (!min_children)
+		return qgrp_node;
+	min_qgrp = qgrp_node;
+	/* scan all queue groups until find a node which has less than the
+	 * minimum number of children. This way all queue group nodes get
+	 * equal number of shares and active. The bandwidth will be equally
+	 * distributed across all queues.
+	 */
+	while (qgrp_node) {
+		/* make sure the qgroup node is part of the VSI subtree */
+		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
+			if (qgrp_node->num_children < min_children &&
+			    qgrp_node->owner == owner) {
+				/* replace the new min queue group node */
+				min_qgrp = qgrp_node;
+				min_children = min_qgrp->num_children;
+				/* break if it has no children, */
+				if (!min_children)
+					break;
+			}
+		qgrp_node = qgrp_node->sibling;
+	}
+	return min_qgrp;
+}
+
+/**
+ * ice_sched_get_free_qparent - Get a free LAN or RDMA queue group node
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: branch number
+ * @owner: LAN or RDMA
+ *
+ * This function retrieves a free LAN or RDMA queue group node
+ */
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+			   u8 owner)
+{
+	struct ice_sched_node *vsi_node, *qgrp_node;
+	struct ice_vsi_ctx *vsi_ctx;
+	u16 max_children;
+	u8 qgrp_layer;
+
+	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
+	max_children = pi->hw->max_children[qgrp_layer];
+
+	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+	if (!vsi_ctx)
+		return NULL;
+	vsi_node = vsi_ctx->sched.vsi_node[tc];
+	/* validate invalid VSI ID */
+	if (!vsi_node)
+		return NULL;
+
+	/* get the first queue group node from VSI sub-tree */
+	qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
+	while (qgrp_node) {
+		/* make sure the qgroup node is part of the VSI subtree */
+		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
+			if (qgrp_node->num_children < max_children &&
+			    qgrp_node->owner == owner)
+				break;
+		qgrp_node = qgrp_node->sibling;
+	}
+
+	/* Select the best queue group */
+	return ice_sched_get_free_qgrp(pi, vsi_node, qgrp_node, owner);
+}
+
+/**
+ * ice_sched_get_vsi_node - Get a VSI node based on VSI ID
+ * @hw: pointer to the HW struct
+ * @tc_node: pointer to the TC node
+ * @vsi_handle: software VSI handle
+ *
+ * This function retrieves a VSI node for a given VSI ID from a given
+ * TC branch
+ */
+static struct ice_sched_node *
+ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
+		       u16 vsi_handle)
+{
+	struct ice_sched_node *node;
+	u8 vsi_layer;
+
+	vsi_layer = ice_sched_get_vsi_layer(hw);
+	node = ice_sched_get_first_node(hw->port_info, tc_node, vsi_layer);
+
+	/* Check whether it already exists */
+	while (node) {
+		if (node->vsi_handle == vsi_handle)
+			return node;
+		node = node->sibling;
+	}
+
+	return node;
+}
+
+/**
+ * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
+ * @hw: pointer to the HW struct
+ * @num_qs: number of queues
+ * @num_nodes: num nodes array
+ *
+ * This function calculates the number of VSI child nodes based on the
+ * number of queues.
+ */
+static void
+ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
+{
+	u16 num = num_qs;
+	u8 i, qgl, vsil;
+
+	qgl = ice_sched_get_qgrp_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+
+	/* calculate num nodes from queue group to VSI layer */
+	for (i = qgl; i > vsil; i--) {
+		/* round to the next integer if there is a remainder */
+		num = DIV_ROUND_UP(num, hw->max_children[i]);
+
+		/* need at least one node */
+		num_nodes[i] = num ? num : 1;
+	}
+}
+
+/**
+ * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_node: pointer to the TC node
+ * @num_nodes: pointer to the num nodes that needs to be added per layer
+ * @owner: node owner (LAN or RDMA)
+ *
+ * This function adds the VSI child nodes to tree. It gets called for
+ * LAN and RDMA separately.
+ */
+static enum ice_status
+ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
+			      struct ice_sched_node *tc_node, u16 *num_nodes,
+			      u8 owner)
+{
+	struct ice_sched_node *parent, *node;
+	struct ice_hw *hw = pi->hw;
+	enum ice_status status;
+	u32 first_node_teid;
+	u16 num_added = 0;
+	u8 i, qgl, vsil;
+
+	qgl = ice_sched_get_qgrp_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+	for (i = vsil + 1; i <= qgl; i++) {
+		if (!parent)
+			return ICE_ERR_CFG;
+
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+						      num_nodes[i],
+						      &first_node_teid,
+						      &num_added);
+		if (status || num_nodes[i] != num_added)
+			return ICE_ERR_CFG;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_added) {
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+			node = parent;
+			while (node) {
+				node->owner = owner;
+				node = node->sibling;
+			}
+		} else {
+			parent = parent->children[0];
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
+ * @hw: pointer to the HW struct
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function calculates the number of supported nodes needed to add this
+ * VSI into Tx tree including the VSI, parent and intermediate nodes in below
+ * layers
+ */
+static void
+ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
+				 struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+	struct ice_sched_node *node;
+	u8 vsil;
+	int i;
+
+	vsil = ice_sched_get_vsi_layer(hw);
+	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
+		/* Add intermediate nodes if TC has no children and
+		 * need at least one node for VSI
+		 */
+		if (!tc_node->num_children || i == vsil) {
+			num_nodes[i]++;
+		} else {
+			/* If intermediate nodes are reached max children
+			 * then add a new one.
+			 */
+			node = ice_sched_get_first_node(hw->port_info, tc_node,
+							(u8)i);
+			/* scan all the siblings */
+			while (node) {
+				if (node->num_children < hw->max_children[i])
+					break;
+				node = node->sibling;
+			}
+
+			/* tree has one intermediate node to add this new VSI.
+			 * So no need to calculate supported nodes for below
+			 * layers.
+			 */
+			if (node)
+				break;
+			/* all the nodes are full, allocate a new one */
+			num_nodes[i]++;
+		}
+}
+
+/**
+ * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function adds the VSI supported nodes into Tx tree including the
+ * VSI, its parent and intermediate nodes in below layers
+ */
+static enum ice_status
+ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
+				struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+	struct ice_sched_node *parent = tc_node;
+	enum ice_status status;
+	u32 first_node_teid;
+	u16 num_added = 0;
+	u8 i, vsil;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
+						      i, num_nodes[i],
+						      &first_node_teid,
+						      &num_added);
+		if (status || num_nodes[i] != num_added)
+			return ICE_ERR_CFG;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_added)
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+		else
+			parent = parent->children[0];
+
+		if (!parent)
+			return ICE_ERR_CFG;
+
+		if (i == vsil)
+			parent->vsi_handle = vsi_handle;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sched_add_vsi_to_topo - add a new VSI into tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ *
+ * This function adds a new VSI into scheduler tree
+ */
+static enum ice_status
+ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
+{
+	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_sched_node *tc_node;
+	struct ice_hw *hw = pi->hw;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_PARAM;
+
+	/* calculate number of supported nodes needed for this VSI */
+	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
+
+	/* add VSI supported nodes to TC subtree */
+	return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
+					       num_nodes);
+}
+
+/**
+ * ice_sched_update_vsi_child_nodes - update VSI child nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @new_numqs: new number of max queues
+ * @owner: owner of this subtree
+ *
+ * This function updates the VSI child nodes based on the number of queues
+ */
+static enum ice_status
+ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
+				 u8 tc, u16 new_numqs, u8 owner)
+{
+	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_sched_node *vsi_node;
+	struct ice_sched_node *tc_node;
+	struct ice_vsi_ctx *vsi_ctx;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	u16 prev_numqs;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_CFG;
+
+	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+	if (!vsi_node)
+		return ICE_ERR_CFG;
+
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return ICE_ERR_PARAM;
+
+	prev_numqs = vsi_ctx->sched.max_lanq[tc];
+	/* num queues are not changed or less than the previous number */
+	if (new_numqs <= prev_numqs)
+		return status;
+	status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
+	if (status)
+		return status;
+
+	if (new_numqs)
+		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
+	/* Keep the max number of queue configuration all the time. Update the
+	 * tree only if number of queues > previous number of queues. This may
+	 * leave some extra nodes in the tree if number of queues < previous
+	 * number but that wouldn't harm anything. Removing those extra nodes
+	 * may complicate the code if those nodes are part of SRL or
+	 * individually rate limited.
+	 */
+	status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
+					       new_num_nodes, owner);
+	if (status)
+		return status;
+	vsi_ctx->sched.max_lanq[tc] = new_numqs;
+
+	return 0;
+}
+
+/**
+ * ice_sched_cfg_vsi - configure the new/existing VSI
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @maxqs: max number of queues
+ * @owner: LAN or RDMA
+ * @enable: TC enabled or disabled
+ *
+ * This function adds/updates VSI nodes based on the number of queues. If TC is
+ * enabled and VSI is in suspended state then resume the VSI back. If TC is
+ * disabled then suspend the VSI if it is not already.
+ */
+enum ice_status
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
+		  u8 owner, bool enable)
+{
+	struct ice_sched_node *vsi_node, *tc_node;
+	struct ice_vsi_ctx *vsi_ctx;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+
+	ice_debug(pi->hw, ICE_DBG_SCHED, "add/config VSI %d\n", vsi_handle);
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_PARAM;
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return ICE_ERR_PARAM;
+	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+
+	/* suspend the VSI if TC is not enabled */
+	if (!enable) {
+		if (vsi_node && vsi_node->in_use) {
+			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
+								true);
+			if (!status)
+				vsi_node->in_use = false;
+		}
+		return status;
+	}
+
+	/* TC is enabled, if it is a new VSI then add it to the tree */
+	if (!vsi_node) {
+		status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
+		if (status)
+			return status;
+
+		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
+		if (!vsi_node)
+			return ICE_ERR_CFG;
+
+		vsi_ctx->sched.vsi_node[tc] = vsi_node;
+		vsi_node->in_use = true;
+		/* invalidate the max queues whenever VSI gets added first time
+		 * into the scheduler tree (boot or after reset). We need to
+		 * recreate the child nodes all the time in these cases.
+		 */
+		vsi_ctx->sched.max_lanq[tc] = 0;
+	}
+
+	/* update the VSI child nodes */
+	status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
+						  owner);
+	if (status)
+		return status;
+
+	/* TC is enabled, resume the VSI if it is in the suspend state */
+	if (!vsi_node->in_use) {
+		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
+		if (!status)
+			vsi_node->in_use = true;
+	}
+
+	return status;
+}
+
+/**
+ * ice_sched_rm_agg_vsi_entry - remove aggregator related VSI info entry
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function removes single aggregator VSI info entry from
+ * aggregator list.
+ */
+static void ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
+{
+	struct ice_sched_agg_info *agg_info;
+	struct ice_sched_agg_info *atmp;
+
+	list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
+				 list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(agg_vsi_info, vtmp,
+					 &agg_info->agg_vsi_list, list_entry)
+			if (agg_vsi_info->vsi_handle == vsi_handle) {
+				list_del(&agg_vsi_info->list_entry);
+				devm_kfree(ice_hw_to_dev(pi->hw),
+					   agg_vsi_info);
+				return;
+			}
+	}
+}
+
+/**
+ * ice_sched_is_leaf_node_present - check for a leaf node in the sub-tree
+ * @node: pointer to the sub-tree node
+ *
+ * This function checks for a leaf node presence in a given sub-tree node.
+ */
+static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
+{
+	u8 i;
+
+	for (i = 0; i < node->num_children; i++)
+		if (ice_sched_is_leaf_node_present(node->children[i]))
+			return true;
+	/* check for a leaf node */
+	return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
+}
+
+/**
+ * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @owner: LAN or RDMA
+ *
+ * This function removes the VSI and its LAN or RDMA children nodes from the
+ * scheduler tree.
+ */
+static enum ice_status
+ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	struct ice_vsi_ctx *vsi_ctx;
+	u8 i;
+
+	ice_debug(pi->hw, ICE_DBG_SCHED, "removing VSI %d\n", vsi_handle);
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return status;
+	mutex_lock(&pi->sched_lock);
+	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+	if (!vsi_ctx)
+		goto exit_sched_rm_vsi_cfg;
+
+	ice_for_each_traffic_class(i) {
+		struct ice_sched_node *vsi_node, *tc_node;
+		u8 j = 0;
+
+		tc_node = ice_sched_get_tc_node(pi, i);
+		if (!tc_node)
+			continue;
+
+		vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
+		if (!vsi_node)
+			continue;
+
+		if (ice_sched_is_leaf_node_present(vsi_node)) {
+			ice_debug(pi->hw, ICE_DBG_SCHED,
+				  "VSI has leaf nodes in TC %d\n", i);
+			status = ICE_ERR_IN_USE;
+			goto exit_sched_rm_vsi_cfg;
+		}
+		while (j < vsi_node->num_children) {
+			if (vsi_node->children[j]->owner == owner) {
+				ice_free_sched_node(pi, vsi_node->children[j]);
+
+				/* reset the counter again since the num
+				 * children will be updated after node removal
+				 */
+				j = 0;
+			} else {
+				j++;
+			}
+		}
+		/* remove the VSI if it has no children */
+		if (!vsi_node->num_children) {
+			ice_free_sched_node(pi, vsi_node);
+			vsi_ctx->sched.vsi_node[i] = NULL;
+
+			/* clean up aggregator related VSI info if any */
+			ice_sched_rm_agg_vsi_info(pi, vsi_handle);
+		}
+		if (owner == ICE_SCHED_NODE_OWNER_LAN)
+			vsi_ctx->sched.max_lanq[i] = 0;
+	}
+	status = 0;
+
+exit_sched_rm_vsi_cfg:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function clears the VSI and its LAN children nodes from scheduler tree
+ * for all TCs.
+ */
+enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
+{
+	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
+}
+
+/**
+ * ice_sched_rm_unused_rl_prof - remove unused RL profile
+ * @pi: port information structure
+ *
+ * This function removes unused rate limit profiles from the HW and
+ * SW DB. The caller needs to hold scheduler lock.
+ */
+static void ice_sched_rm_unused_rl_prof(struct ice_port_info *pi)
+{
+	u16 ln;
+
+	for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+		struct ice_aqc_rl_profile_info *rl_prof_elem;
+		struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+		list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+					 &pi->rl_prof_list[ln], list_entry) {
+			if (!ice_sched_del_rl_profile(pi->hw, rl_prof_elem))
+				ice_debug(pi->hw, ICE_DBG_SCHED,
+					  "Removed rl profile\n");
+		}
+	}
+}
+
+/**
+ * ice_sched_update_elem - update element
+ * @hw: pointer to the HW struct
+ * @node: pointer to node
+ * @info: node info to update
+ *
+ * Update the HW DB, and local SW DB of node. Update the scheduling
+ * parameters of node from argument info data buffer (Info->data buf) and
+ * returns success or error on config sched element failure. The caller
+ * needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node,
+		      struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	enum ice_status status;
+	u16 elem_cfgd = 0;
+	u16 num_elems = 1;
+
+	buf = *info;
+	/* Parent TEID is reserved field in this aq call */
+	buf.parent_teid = 0;
+	/* Element type is reserved field in this aq call */
+	buf.data.elem_type = 0;
+	/* Flags is reserved field in this aq call */
+	buf.data.flags = 0;
+
+	/* Update HW DB */
+	/* Configure element node */
+	status = ice_aq_cfg_sched_elems(hw, num_elems, &buf, sizeof(buf),
+					&elem_cfgd, NULL);
+	if (status || elem_cfgd != num_elems) {
+		ice_debug(hw, ICE_DBG_SCHED, "Config sched elem error\n");
+		return ICE_ERR_CFG;
+	}
+
+	/* Config success case */
+	/* Now update local SW DB */
+	/* Only copy the data portion of info buffer */
+	node->info.data = info->data;
+	return status;
+}
+
+/**
+ * ice_sched_cfg_node_bw_alloc - configure node BW weight/alloc params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @bw_alloc: BW weight/allocation
+ *
+ * This function configures node element's BW allocation.
+ */
+static enum ice_status
+ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
+			    enum ice_rl_type rl_type, u16 bw_alloc)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+	enum ice_status status;
+
+	buf = node->info;
+	data = &buf.data;
+	if (rl_type == ICE_MIN_BW) {
+		data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+		data->cir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+	} else if (rl_type == ICE_MAX_BW) {
+		data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+		data->eir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+	} else {
+		return ICE_ERR_PARAM;
+	}
+
+	/* Configure element */
+	status = ice_sched_update_elem(hw, node, &buf);
+	return status;
+}
+
+/**
+ * ice_set_clear_cir_bw - set or clear CIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear CIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->cir_bw.bw = 0;
+	} else {
+		/* Save type of BW information */
+		set_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->cir_bw.bw = bw;
+	}
+}
+
+/**
+ * ice_set_clear_eir_bw - set or clear EIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear EIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = 0;
+	} else {
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element.
+		 * First clear earlier saved shared BW information.
+		 */
+		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = 0;
+		/* save EIR BW information */
+		set_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = bw;
+	}
+}
+
+/**
+ * ice_set_clear_shared_bw - set or clear shared BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear shared bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = 0;
+	} else {
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element.
+		 * First clear earlier saved EIR BW information.
+		 */
+		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = 0;
+		/* save shared BW information */
+		set_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = bw;
+	}
+}
+
+/**
+ * ice_sched_calc_wakeup - calculate RL profile wakeup parameter
+ * @bw: bandwidth in Kbps
+ *
+ * This function calculates the wakeup parameter of RL profile.
+ */
+static u16 ice_sched_calc_wakeup(s32 bw)
+{
+	s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f;
+	s32 wakeup_f_int;
+	u16 wakeup = 0;
+
+	/* Get the wakeup integer value */
+	bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+	wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec);
+	if (wakeup_int > 63) {
+		wakeup = (u16)((1 << 15) | wakeup_int);
+	} else {
+		/* Calculate fraction value up to 4 decimals
+		 * Convert Integer value to a constant multiplier
+		 */
+		wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int;
+		wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER *
+					   ICE_RL_PROF_FREQUENCY,
+				      bytes_per_sec);
+
+		/* Get Fraction value */
+		wakeup_f = wakeup_a - wakeup_b;
+
+		/* Round up the Fractional value via Ceil(Fractional value) */
+		if (wakeup_f > div64_long(ICE_RL_PROF_MULTIPLIER, 2))
+			wakeup_f += 1;
+
+		wakeup_f_int = (s32)div64_long(wakeup_f * ICE_RL_PROF_FRACTION,
+					       ICE_RL_PROF_MULTIPLIER);
+		wakeup |= (u16)(wakeup_int << 9);
+		wakeup |= (u16)(0x1ff & wakeup_f_int);
+	}
+
+	return wakeup;
+}
+
+/**
+ * ice_sched_bw_to_rl_profile - convert BW to profile parameters
+ * @bw: bandwidth in Kbps
+ * @profile: profile parameters to return
+ *
+ * This function converts the BW to profile structure format.
+ */
+static enum ice_status
+ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	s64 bytes_per_sec, ts_rate, mv_tmp;
+	bool found = false;
+	s32 encode = 0;
+	s64 mv = 0;
+	s32 i;
+
+	/* Bw settings range is from 0.5Mb/sec to 100Gb/sec */
+	if (bw < ICE_SCHED_MIN_BW || bw > ICE_SCHED_MAX_BW)
+		return status;
+
+	/* Bytes per second from Kbps */
+	bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+
+	/* encode is 6 bits but really useful are 5 bits */
+	for (i = 0; i < 64; i++) {
+		u64 pow_result = BIT_ULL(i);
+
+		ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY,
+				     pow_result * ICE_RL_PROF_TS_MULTIPLIER);
+		if (ts_rate <= 0)
+			continue;
+
+		/* Multiplier value */
+		mv_tmp = div64_long(bytes_per_sec * ICE_RL_PROF_MULTIPLIER,
+				    ts_rate);
+
+		/* Round to the nearest ICE_RL_PROF_MULTIPLIER */
+		mv = round_up_64bit(mv_tmp, ICE_RL_PROF_MULTIPLIER);
+
+		/* First multiplier value greater than the given
+		 * accuracy bytes
+		 */
+		if (mv > ICE_RL_PROF_ACCURACY_BYTES) {
+			encode = i;
+			found = true;
+			break;
+		}
+	}
+	if (found) {
+		u16 wm;
+
+		wm = ice_sched_calc_wakeup(bw);
+		profile->rl_multiply = cpu_to_le16(mv);
+		profile->wake_up_calc = cpu_to_le16(wm);
+		profile->rl_encode = cpu_to_le16(encode);
+		status = 0;
+	} else {
+		status = ICE_ERR_DOES_NOT_EXIST;
+	}
+
+	return status;
+}
+
+/**
+ * ice_sched_add_rl_profile - add RL profile
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: specifies in which layer to create profile
+ *
+ * This function first checks the existing list for corresponding BW
+ * parameter. If it exists, it returns the associated profile otherwise
+ * it creates a new rate limit profile for requested BW, and adds it to
+ * the HW DB and local list. It returns the new profile or null on error.
+ * The caller needs to hold the scheduler lock.
+ */
+static struct ice_aqc_rl_profile_info *
+ice_sched_add_rl_profile(struct ice_port_info *pi,
+			 enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_elem;
+	u16 profiles_added = 0, num_profiles = 1;
+	struct ice_aqc_rl_profile_elem *buf;
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 profile_type;
+
+	if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+		return NULL;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+		break;
+	case ICE_MAX_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+		break;
+	case ICE_SHARED_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+		break;
+	default:
+		return NULL;
+	}
+
+	if (!pi)
+		return NULL;
+	hw = pi->hw;
+	list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+			    list_entry)
+		if ((rl_prof_elem->profile.flags & ICE_AQC_RL_PROFILE_TYPE_M) ==
+		    profile_type && rl_prof_elem->bw == bw)
+			/* Return existing profile ID info */
+			return rl_prof_elem;
+
+	/* Create new profile ID */
+	rl_prof_elem = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rl_prof_elem),
+				    GFP_KERNEL);
+
+	if (!rl_prof_elem)
+		return NULL;
+
+	status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile);
+	if (status)
+		goto exit_add_rl_prof;
+
+	rl_prof_elem->bw = bw;
+	/* layer_num is zero relative, and fw expects level from 1 to 9 */
+	rl_prof_elem->profile.level = layer_num + 1;
+	rl_prof_elem->profile.flags = profile_type;
+	rl_prof_elem->profile.max_burst_size = cpu_to_le16(hw->max_burst_size);
+
+	/* Create new entry in HW DB */
+	buf = &rl_prof_elem->profile;
+	status = ice_aq_add_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+				       &profiles_added, NULL);
+	if (status || profiles_added != num_profiles)
+		goto exit_add_rl_prof;
+
+	/* Good entry - add in the list */
+	rl_prof_elem->prof_id_ref = 0;
+	list_add(&rl_prof_elem->list_entry, &pi->rl_prof_list[layer_num]);
+	return rl_prof_elem;
+
+exit_add_rl_prof:
+	devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+	return NULL;
+}
+
+/**
+ * ice_sched_cfg_node_bw_lmt - configure node sched params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @rl_prof_id: rate limit profile ID
+ *
+ * This function configures node element's BW limit.
+ */
+static enum ice_status
+ice_sched_cfg_node_bw_lmt(struct ice_hw *hw, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u16 rl_prof_id)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+
+	buf = node->info;
+	data = &buf.data;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+		data->cir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+		break;
+	case ICE_MAX_BW:
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element
+		 */
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+			return ICE_ERR_CFG;
+		data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+		data->eir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+		break;
+	case ICE_SHARED_BW:
+		/* Check for removing shared BW */
+		if (rl_prof_id == ICE_SCHED_NO_SHARED_RL_PROF_ID) {
+			/* remove shared profile */
+			data->valid_sections &= ~ICE_AQC_ELEM_VALID_SHARED;
+			data->srl_id = 0; /* clear SRL field */
+
+			/* enable back EIR to default profile */
+			data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+			data->eir_bw.bw_profile_idx =
+				cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+			break;
+		}
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element
+		 */
+		if ((data->valid_sections & ICE_AQC_ELEM_VALID_EIR) &&
+		    (le16_to_cpu(data->eir_bw.bw_profile_idx) !=
+			    ICE_SCHED_DFLT_RL_PROF_ID))
+			return ICE_ERR_CFG;
+		/* EIR BW is set to default, disable it */
+		data->valid_sections &= ~ICE_AQC_ELEM_VALID_EIR;
+		/* Okay to enable shared BW now */
+		data->valid_sections |= ICE_AQC_ELEM_VALID_SHARED;
+		data->srl_id = cpu_to_le16(rl_prof_id);
+		break;
+	default:
+		/* Unknown rate limit type */
+		return ICE_ERR_PARAM;
+	}
+
+	/* Configure element */
+	return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_sched_get_node_rl_prof_id - get node's rate limit profile ID
+ * @node: sched node
+ * @rl_type: rate limit type
+ *
+ * If existing profile matches, it returns the corresponding rate
+ * limit profile ID, otherwise it returns an invalid ID as error.
+ */
+static u16
+ice_sched_get_node_rl_prof_id(struct ice_sched_node *node,
+			      enum ice_rl_type rl_type)
+{
+	u16 rl_prof_id = ICE_SCHED_INVAL_PROF_ID;
+	struct ice_aqc_txsched_elem *data;
+
+	data = &node->info.data;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_CIR)
+			rl_prof_id = le16_to_cpu(data->cir_bw.bw_profile_idx);
+		break;
+	case ICE_MAX_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_EIR)
+			rl_prof_id = le16_to_cpu(data->eir_bw.bw_profile_idx);
+		break;
+	case ICE_SHARED_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+			rl_prof_id = le16_to_cpu(data->srl_id);
+		break;
+	default:
+		break;
+	}
+
+	return rl_prof_id;
+}
+
+/**
+ * ice_sched_get_rl_prof_layer - selects rate limit profile creation layer
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @layer_index: layer index
+ *
+ * This function returns requested profile creation layer.
+ */
+static u8
+ice_sched_get_rl_prof_layer(struct ice_port_info *pi, enum ice_rl_type rl_type,
+			    u8 layer_index)
+{
+	struct ice_hw *hw = pi->hw;
+
+	if (layer_index >= hw->num_tx_sched_layers)
+		return ICE_SCHED_INVAL_LAYER_NUM;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		if (hw->layer_info[layer_index].max_cir_rl_profiles)
+			return layer_index;
+		break;
+	case ICE_MAX_BW:
+		if (hw->layer_info[layer_index].max_eir_rl_profiles)
+			return layer_index;
+		break;
+	case ICE_SHARED_BW:
+		/* if current layer doesn't support SRL profile creation
+		 * then try a layer up or down.
+		 */
+		if (hw->layer_info[layer_index].max_srl_profiles)
+			return layer_index;
+		else if (layer_index < hw->num_tx_sched_layers - 1 &&
+			 hw->layer_info[layer_index + 1].max_srl_profiles)
+			return layer_index + 1;
+		else if (layer_index > 0 &&
+			 hw->layer_info[layer_index - 1].max_srl_profiles)
+			return layer_index - 1;
+		break;
+	default:
+		break;
+	}
+	return ICE_SCHED_INVAL_LAYER_NUM;
+}
+
+/**
+ * ice_sched_get_srl_node - get shared rate limit node
+ * @node: tree node
+ * @srl_layer: shared rate limit layer
+ *
+ * This function returns SRL node to be used for shared rate limit purpose.
+ * The caller needs to hold scheduler lock.
+ */
+static struct ice_sched_node *
+ice_sched_get_srl_node(struct ice_sched_node *node, u8 srl_layer)
+{
+	if (srl_layer > node->tx_sched_layer)
+		return node->children[0];
+	else if (srl_layer < node->tx_sched_layer)
+		/* Node can't be created without a parent. It will always
+		 * have a valid parent except root node.
+		 */
+		return node->parent;
+	else
+		return node;
+}
+
+/**
+ * ice_sched_rm_rl_profile - remove RL profile ID
+ * @pi: port information structure
+ * @layer_num: layer number where profiles are saved
+ * @profile_type: profile type like EIR, CIR, or SRL
+ * @profile_id: profile ID to remove
+ *
+ * This function removes rate limit profile from layer 'layer_num' of type
+ * 'profile_type' and profile ID as 'profile_id'. The caller needs to hold
+ * scheduler lock.
+ */
+static enum ice_status
+ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type,
+			u16 profile_id)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_elem;
+	enum ice_status status = 0;
+
+	if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+		return ICE_ERR_PARAM;
+	/* Check the existing list for RL profile */
+	list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+			    list_entry)
+		if ((rl_prof_elem->profile.flags & ICE_AQC_RL_PROFILE_TYPE_M) ==
+		    profile_type &&
+		    le16_to_cpu(rl_prof_elem->profile.profile_id) ==
+		    profile_id) {
+			if (rl_prof_elem->prof_id_ref)
+				rl_prof_elem->prof_id_ref--;
+
+			/* Remove old profile ID from database */
+			status = ice_sched_del_rl_profile(pi->hw, rl_prof_elem);
+			if (status && status != ICE_ERR_IN_USE)
+				ice_debug(pi->hw, ICE_DBG_SCHED,
+					  "Remove rl profile failed\n");
+			break;
+		}
+	if (status == ICE_ERR_IN_USE)
+		status = 0;
+	return status;
+}
+
+/**
+ * ice_sched_set_node_bw_dflt - set node's bandwidth limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ * @layer_num: layer number where RL profiles are saved
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_node_bw_dflt(struct ice_port_info *pi,
+			   struct ice_sched_node *node,
+			   enum ice_rl_type rl_type, u8 layer_num)
+{
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 profile_type;
+	u16 rl_prof_id;
+	u16 old_id;
+
+	hw = pi->hw;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+		rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+		break;
+	case ICE_MAX_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+		rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+		break;
+	case ICE_SHARED_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+		/* No SRL is configured for default case */
+		rl_prof_id = ICE_SCHED_NO_SHARED_RL_PROF_ID;
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	/* Save existing RL prof ID for later clean up */
+	old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+	/* Configure BW scheduling parameters */
+	status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+	if (status)
+		return status;
+
+	/* Remove stale RL profile ID */
+	if (old_id == ICE_SCHED_DFLT_RL_PROF_ID ||
+	    old_id == ICE_SCHED_INVAL_PROF_ID)
+		return 0;
+
+	return ice_sched_rm_rl_profile(pi, layer_num, profile_type, old_id);
+}
+
+/**
+ * ice_sched_set_eir_srl_excl - set EIR/SRL exclusiveness
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @layer_num: layer number where rate limit profiles are saved
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth value
+ *
+ * This function prepares node element's bandwidth to SRL or EIR exclusively.
+ * EIR BW and Shared BW profiles are mutually exclusive and hence only one of
+ * them may be set for any given element. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_eir_srl_excl(struct ice_port_info *pi,
+			   struct ice_sched_node *node,
+			   u8 layer_num, enum ice_rl_type rl_type, u32 bw)
+{
+	if (rl_type == ICE_SHARED_BW) {
+		/* SRL node passed in this case, it may be different node */
+		if (bw == ICE_SCHED_DFLT_BW)
+			/* SRL being removed, ice_sched_cfg_node_bw_lmt()
+			 * enables EIR to default. EIR is not set in this
+			 * case, so no additional action is required.
+			 */
+			return 0;
+
+		/* SRL being configured, set EIR to default here.
+		 * ice_sched_cfg_node_bw_lmt() disables EIR when it
+		 * configures SRL
+		 */
+		return ice_sched_set_node_bw_dflt(pi, node, ICE_MAX_BW,
+						  layer_num);
+	} else if (rl_type == ICE_MAX_BW &&
+		   node->info.data.valid_sections & ICE_AQC_ELEM_VALID_SHARED) {
+		/* Remove Shared profile. Set default shared BW call
+		 * removes shared profile for a node.
+		 */
+		return ice_sched_set_node_bw_dflt(pi, node,
+						  ICE_SHARED_BW,
+						  layer_num);
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_set_node_bw - set node's bandwidth
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: layer number
+ *
+ * This function adds new profile corresponding to requested BW, configures
+ * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile
+ * ID from local database. The caller needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node,
+		      enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_info;
+	enum ice_status status = ICE_ERR_PARAM;
+	struct ice_hw *hw = pi->hw;
+	u16 old_id, rl_prof_id;
+
+	rl_prof_info = ice_sched_add_rl_profile(pi, rl_type, bw, layer_num);
+	if (!rl_prof_info)
+		return status;
+
+	rl_prof_id = le16_to_cpu(rl_prof_info->profile.profile_id);
+
+	/* Save existing RL prof ID for later clean up */
+	old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+	/* Configure BW scheduling parameters */
+	status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+	if (status)
+		return status;
+
+	/* New changes has been applied */
+	/* Increment the profile ID reference count */
+	rl_prof_info->prof_id_ref++;
+
+	/* Check for old ID removal */
+	if ((old_id == ICE_SCHED_DFLT_RL_PROF_ID && rl_type != ICE_SHARED_BW) ||
+	    old_id == ICE_SCHED_INVAL_PROF_ID || old_id == rl_prof_id)
+		return 0;
+
+	return ice_sched_rm_rl_profile(pi, layer_num,
+				       rl_prof_info->profile.flags &
+				       ICE_AQC_RL_PROFILE_TYPE_M, old_id);
+}
+
+/**
+ * ice_sched_set_node_bw_lmt - set node's BW limit
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * It updates node's BW limit parameters like BW RL profile ID of type CIR,
+ * EIR, or SRL. The caller needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u32 bw)
+{
+	struct ice_sched_node *cfg_node = node;
+	enum ice_status status;
+
+	struct ice_hw *hw;
+	u8 layer_num;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+	/* Remove unused RL profile IDs from HW and SW DB */
+	ice_sched_rm_unused_rl_prof(pi);
+	layer_num = ice_sched_get_rl_prof_layer(pi, rl_type,
+						node->tx_sched_layer);
+	if (layer_num >= hw->num_tx_sched_layers)
+		return ICE_ERR_PARAM;
+
+	if (rl_type == ICE_SHARED_BW) {
+		/* SRL node may be different */
+		cfg_node = ice_sched_get_srl_node(node, layer_num);
+		if (!cfg_node)
+			return ICE_ERR_CFG;
+	}
+	/* EIR BW and Shared BW profiles are mutually exclusive and
+	 * hence only one of them may be set for any given element
+	 */
+	status = ice_sched_set_eir_srl_excl(pi, cfg_node, layer_num, rl_type,
+					    bw);
+	if (status)
+		return status;
+	if (bw == ICE_SCHED_DFLT_BW)
+		return ice_sched_set_node_bw_dflt(pi, cfg_node, rl_type,
+						  layer_num);
+	return ice_sched_set_node_bw(pi, cfg_node, rl_type, bw, layer_num);
+}
+
+/**
+ * ice_sched_set_node_bw_dflt_lmt - set node's BW limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_node_bw_dflt_lmt(struct ice_port_info *pi,
+			       struct ice_sched_node *node,
+			       enum ice_rl_type rl_type)
+{
+	return ice_sched_set_node_bw_lmt(pi, node, rl_type,
+					 ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_sched_validate_srl_node - Check node for SRL applicability
+ * @node: sched node to configure
+ * @sel_layer: selected SRL layer
+ *
+ * This function checks if the SRL can be applied to a selected layer node on
+ * behalf of the requested node (first argument). This function needs to be
+ * called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_validate_srl_node(struct ice_sched_node *node, u8 sel_layer)
+{
+	/* SRL profiles are not available on all layers. Check if the
+	 * SRL profile can be applied to a node above or below the
+	 * requested node. SRL configuration is possible only if the
+	 * selected layer's node has single child.
+	 */
+	if (sel_layer == node->tx_sched_layer ||
+	    ((sel_layer == node->tx_sched_layer + 1) &&
+	    node->num_children == 1) ||
+	    ((sel_layer == node->tx_sched_layer - 1) &&
+	    (node->parent && node->parent->num_children == 1)))
+		return 0;
+
+	return ICE_ERR_CFG;
+}
+
+/**
+ * ice_sched_save_q_bw - save queue node's BW information
+ * @q_ctx: queue context structure
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of queue type node for post replay use.
+ */
+static enum ice_status
+ice_sched_save_q_bw(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type, u32 bw)
+{
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		ice_set_clear_cir_bw(&q_ctx->bw_t_info, bw);
+		break;
+	case ICE_MAX_BW:
+		ice_set_clear_eir_bw(&q_ctx->bw_t_info, bw);
+		break;
+	case ICE_SHARED_BW:
+		ice_set_clear_shared_bw(&q_ctx->bw_t_info, bw);
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_set_q_bw_lmt - sets queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of queue scheduling node.
+ */
+static enum ice_status
+ice_sched_set_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		       u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	struct ice_sched_node *node;
+	struct ice_q_ctx *q_ctx;
+
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	mutex_lock(&pi->sched_lock);
+	q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle);
+	if (!q_ctx)
+		goto exit_q_bw_lmt;
+	node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+	if (!node) {
+		ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n");
+		goto exit_q_bw_lmt;
+	}
+
+	/* Return error if it is not a leaf node */
+	if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF)
+		goto exit_q_bw_lmt;
+
+	/* SRL bandwidth layer selection */
+	if (rl_type == ICE_SHARED_BW) {
+		u8 sel_layer; /* selected layer */
+
+		sel_layer = ice_sched_get_rl_prof_layer(pi, rl_type,
+							node->tx_sched_layer);
+		if (sel_layer >= pi->hw->num_tx_sched_layers) {
+			status = ICE_ERR_PARAM;
+			goto exit_q_bw_lmt;
+		}
+		status = ice_sched_validate_srl_node(node, sel_layer);
+		if (status)
+			goto exit_q_bw_lmt;
+	}
+
+	if (bw == ICE_SCHED_DFLT_BW)
+		status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+	else
+		status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+	if (!status)
+		status = ice_sched_save_q_bw(q_ctx, rl_type, bw);
+
+exit_q_bw_lmt:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_q_bw_lmt - configure queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		 u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+	return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+				      bw);
+}
+
+/**
+ * ice_cfg_q_bw_dflt_lmt - configure queue BW default limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ *
+ * This function configures BW default limit of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 q_handle, enum ice_rl_type rl_type)
+{
+	return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+				      ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_cfg_rl_burst_size - Set burst size value
+ * @hw: pointer to the HW struct
+ * @bytes: burst size in bytes
+ *
+ * This function configures/set the burst size to requested new value. The new
+ * burst size value is used for future rate limit calls. It doesn't change the
+ * existing or previously created RL profiles.
+ */
+enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes)
+{
+	u16 burst_size_to_prog;
+
+	if (bytes < ICE_MIN_BURST_SIZE_ALLOWED ||
+	    bytes > ICE_MAX_BURST_SIZE_ALLOWED)
+		return ICE_ERR_PARAM;
+	if (ice_round_to_num(bytes, 64) <=
+	    ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY) {
+		/* 64 byte granularity case */
+		/* Disable MSB granularity bit */
+		burst_size_to_prog = ICE_64_BYTE_GRANULARITY;
+		/* round number to nearest 64 byte granularity */
+		bytes = ice_round_to_num(bytes, 64);
+		/* The value is in 64 byte chunks */
+		burst_size_to_prog |= (u16)(bytes / 64);
+	} else {
+		/* k bytes granularity case */
+		/* Enable MSB granularity bit */
+		burst_size_to_prog = ICE_KBYTE_GRANULARITY;
+		/* round number to nearest 1024 granularity */
+		bytes = ice_round_to_num(bytes, 1024);
+		/* check rounding doesn't go beyond allowed */
+		if (bytes > ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY)
+			bytes = ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY;
+		/* The value is in k bytes */
+		burst_size_to_prog |= (u16)(bytes / 1024);
+	}
+	hw->max_burst_size = burst_size_to_prog;
+	return 0;
+}
+
+/**
+ * ice_sched_replay_node_prio - re-configure node priority
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @priority: priority value
+ *
+ * This function configures node element's priority value. It
+ * needs to be called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_replay_node_prio(struct ice_hw *hw, struct ice_sched_node *node,
+			   u8 priority)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+	enum ice_status status;
+
+	buf = node->info;
+	data = &buf.data;
+	data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC;
+	data->generic = priority;
+
+	/* Configure element */
+	status = ice_sched_update_elem(hw, node, &buf);
+	return status;
+}
+
+/**
+ * ice_sched_replay_node_bw - replay node(s) BW
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @bw_t_info: BW type information
+ *
+ * This function restores node's BW from bw_t_info. The caller needs
+ * to hold the scheduler lock.
+ */
+static enum ice_status
+ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node,
+			 struct ice_bw_type_info *bw_t_info)
+{
+	struct ice_port_info *pi = hw->port_info;
+	enum ice_status status = ICE_ERR_PARAM;
+	u16 bw_alloc;
+
+	if (!node)
+		return status;
+	if (bitmap_empty(bw_t_info->bw_t_bitmap, ICE_BW_TYPE_CNT))
+		return 0;
+	if (test_bit(ICE_BW_TYPE_PRIO, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_replay_node_prio(hw, node,
+						    bw_t_info->generic);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW,
+						   bw_t_info->cir_bw.bw);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_CIR_WT, bw_t_info->bw_t_bitmap)) {
+		bw_alloc = bw_t_info->cir_bw.bw_alloc;
+		status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MIN_BW,
+						     bw_alloc);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW,
+						   bw_t_info->eir_bw.bw);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_EIR_WT, bw_t_info->bw_t_bitmap)) {
+		bw_alloc = bw_t_info->eir_bw.bw_alloc;
+		status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MAX_BW,
+						     bw_alloc);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap))
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_SHARED_BW,
+						   bw_t_info->shared_bw);
+	return status;
+}
+
+/**
+ * ice_sched_replay_q_bw - replay queue type node BW
+ * @pi: port information structure
+ * @q_ctx: queue context structure
+ *
+ * This function replays queue type node bandwidth. This function needs to be
+ * called with scheduler lock held.
+ */
+enum ice_status
+ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx)
+{
+	struct ice_sched_node *q_node;
+
+	/* Following also checks the presence of node in tree */
+	q_node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+	if (!q_node)
+		return ICE_ERR_PARAM;
+	return ice_sched_replay_node_bw(pi->hw, q_node, &q_ctx->bw_t_info);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
new file mode 100644
index 000000000..0e55ae0d4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SCHED_H_
+#define _ICE_SCHED_H_
+
+#include "ice_common.h"
+
+#define ICE_QGRP_LAYER_OFFSET	2
+#define ICE_VSI_LAYER_OFFSET	4
+#define ICE_SCHED_INVAL_LAYER_NUM	0xFF
+/* Burst size is a 12 bits register that is configured while creating the RL
+ * profile(s). MSB is a granularity bit and tells the granularity type
+ * 0 - LSB bits are in 64 bytes granularity
+ * 1 - LSB bits are in 1K bytes granularity
+ */
+#define ICE_64_BYTE_GRANULARITY			0
+#define ICE_KBYTE_GRANULARITY			BIT(11)
+#define ICE_MIN_BURST_SIZE_ALLOWED		64 /* In Bytes */
+#define ICE_MAX_BURST_SIZE_ALLOWED \
+	((BIT(11) - 1) * 1024) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY \
+	((BIT(11) - 1) * 64) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY	ICE_MAX_BURST_SIZE_ALLOWED
+
+#define ICE_RL_PROF_FREQUENCY 446000000
+#define ICE_RL_PROF_ACCURACY_BYTES 128
+#define ICE_RL_PROF_MULTIPLIER 10000
+#define ICE_RL_PROF_TS_MULTIPLIER 32
+#define ICE_RL_PROF_FRACTION 512
+
+/* BW rate limit profile parameters list entry along
+ * with bandwidth maintained per layer in port info
+ */
+struct ice_aqc_rl_profile_info {
+	struct ice_aqc_rl_profile_elem profile;
+	struct list_head list_entry;
+	u32 bw;			/* requested */
+	u16 prof_id_ref;	/* profile ID to node association ref count */
+};
+
+struct ice_sched_agg_vsi_info {
+	struct list_head list_entry;
+	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	u16 vsi_handle;
+};
+
+struct ice_sched_agg_info {
+	struct list_head agg_vsi_list;
+	struct list_head list_entry;
+	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	u32 agg_id;
+	enum ice_agg_type agg_type;
+};
+
+/* FW AQ command calls */
+enum ice_status
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+			 struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+			 u16 *elems_ret, struct ice_sq_cd *cd);
+enum ice_status ice_sched_init_port(struct ice_port_info *pi);
+enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
+void ice_sched_clear_port(struct ice_port_info *pi);
+void ice_sched_cleanup_all(struct ice_hw *hw);
+void ice_sched_clear_agg(struct ice_hw *hw);
+
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid);
+enum ice_status
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+		   struct ice_aqc_txsched_elem_data *info);
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node);
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+			   u8 owner);
+enum ice_status
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
+		  u8 owner, bool enable);
+enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+enum ice_status
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		 u16 q_handle, enum ice_rl_type rl_type, u32 bw);
+enum ice_status
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 q_handle, enum ice_rl_type rl_type);
+enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
+enum ice_status
+ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx);
+#endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
new file mode 100644
index 000000000..554f56747
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_adminq_cmd.h"
+#include "ice_sriov.h"
+
+/**
+ * ice_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: VF ID to send msg
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cd: pointer to command details
+ *
+ * Send message to VF driver (0x0802) using mailbox
+ * queue and asynchronously sending message via
+ * ice_sq_send_cmd() function
+ */
+enum ice_status
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+		      u8 *msg, u16 msglen, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_pf_vf_msg *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf);
+
+	cmd = &desc.params.virt;
+	cmd->id = cpu_to_le32(vfid);
+
+	desc.cookie_high = cpu_to_le32(v_opcode);
+	desc.cookie_low = cpu_to_le32(v_retval);
+
+	if (msglen)
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
+}
+
+/**
+ * ice_conv_link_speed_to_virtchnl
+ * @adv_link_support: determines the format of the returned link speed
+ * @link_speed: variable containing the link_speed to be converted
+ *
+ * Convert link speed supported by HW to link speed supported by virtchnl.
+ * If adv_link_support is true, then return link speed in Mbps. Else return
+ * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller
+ * needs to cast back to an enum virtchnl_link_speed in the case where
+ * adv_link_support is false, but when adv_link_support is true the caller can
+ * expect the speed in Mbps.
+ */
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
+{
+	u32 speed;
+
+	if (adv_link_support)
+		switch (link_speed) {
+		case ICE_AQ_LINK_SPEED_10MB:
+			speed = ICE_LINK_SPEED_10MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_100MB:
+			speed = ICE_LINK_SPEED_100MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_1000MB:
+			speed = ICE_LINK_SPEED_1000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_2500MB:
+			speed = ICE_LINK_SPEED_2500MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_5GB:
+			speed = ICE_LINK_SPEED_5000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_10GB:
+			speed = ICE_LINK_SPEED_10000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_20GB:
+			speed = ICE_LINK_SPEED_20000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_25GB:
+			speed = ICE_LINK_SPEED_25000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_40GB:
+			speed = ICE_LINK_SPEED_40000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_50GB:
+			speed = ICE_LINK_SPEED_50000MBPS;
+			break;
+		case ICE_AQ_LINK_SPEED_100GB:
+			speed = ICE_LINK_SPEED_100000MBPS;
+			break;
+		default:
+			speed = ICE_LINK_SPEED_UNKNOWN;
+			break;
+		}
+	else
+		/* Virtchnl speeds are not defined for every speed supported in
+		 * the hardware. To maintain compatibility with older AVF
+		 * drivers, while reporting the speed the new speed values are
+		 * resolved to the closest known virtchnl speeds
+		 */
+		switch (link_speed) {
+		case ICE_AQ_LINK_SPEED_10MB:
+		case ICE_AQ_LINK_SPEED_100MB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_100MB;
+			break;
+		case ICE_AQ_LINK_SPEED_1000MB:
+		case ICE_AQ_LINK_SPEED_2500MB:
+		case ICE_AQ_LINK_SPEED_5GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_1GB;
+			break;
+		case ICE_AQ_LINK_SPEED_10GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_10GB;
+			break;
+		case ICE_AQ_LINK_SPEED_20GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_20GB;
+			break;
+		case ICE_AQ_LINK_SPEED_25GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_25GB;
+			break;
+		case ICE_AQ_LINK_SPEED_40GB:
+		case ICE_AQ_LINK_SPEED_50GB:
+		case ICE_AQ_LINK_SPEED_100GB:
+			speed = (u32)VIRTCHNL_LINK_SPEED_40GB;
+			break;
+		default:
+			speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN;
+			break;
+		}
+
+	return speed;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h
new file mode 100644
index 000000000..3d78a0795
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SRIOV_H_
+#define _ICE_SRIOV_H_
+
+#include "ice_common.h"
+
+#ifdef CONFIG_PCI_IOV
+enum ice_status
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+		      u8 *msg, u16 msglen, struct ice_sq_cd *cd);
+
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed);
+#else /* CONFIG_PCI_IOV */
+static inline enum ice_status
+ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw,
+		      u16 __always_unused vfid, u32 __always_unused v_opcode,
+		      u32 __always_unused v_retval, u8 __always_unused *msg,
+		      u16 __always_unused msglen,
+		      struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline u32
+ice_conv_link_speed_to_virtchnl(bool __always_unused adv_link_support,
+				u16 __always_unused link_speed)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_SRIOV_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
new file mode 100644
index 000000000..4028c6365
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_STATUS_H_
+#define _ICE_STATUS_H_
+
+/* Error Codes */
+enum ice_status {
+	ICE_SUCCESS				= 0,
+
+	/* Generic codes : Range -1..-49 */
+	ICE_ERR_PARAM				= -1,
+	ICE_ERR_NOT_IMPL			= -2,
+	ICE_ERR_NOT_READY			= -3,
+	ICE_ERR_NOT_SUPPORTED			= -4,
+	ICE_ERR_BAD_PTR				= -5,
+	ICE_ERR_INVAL_SIZE			= -6,
+	ICE_ERR_DEVICE_NOT_SUPPORTED		= -8,
+	ICE_ERR_RESET_FAILED			= -9,
+	ICE_ERR_FW_API_VER			= -10,
+	ICE_ERR_NO_MEMORY			= -11,
+	ICE_ERR_CFG				= -12,
+	ICE_ERR_OUT_OF_RANGE			= -13,
+	ICE_ERR_ALREADY_EXISTS			= -14,
+	ICE_ERR_DOES_NOT_EXIST			= -15,
+	ICE_ERR_IN_USE				= -16,
+	ICE_ERR_MAX_LIMIT			= -17,
+	ICE_ERR_RESET_ONGOING			= -18,
+	ICE_ERR_HW_TABLE			= -19,
+	ICE_ERR_FW_DDP_MISMATCH			= -20,
+
+	ICE_ERR_NVM_CHECKSUM			= -51,
+	ICE_ERR_BUF_TOO_SHORT			= -52,
+	ICE_ERR_NVM_BLANK_MODE			= -53,
+	ICE_ERR_AQ_ERROR			= -100,
+	ICE_ERR_AQ_TIMEOUT			= -101,
+	ICE_ERR_AQ_FULL				= -102,
+	ICE_ERR_AQ_NO_WORK			= -103,
+	ICE_ERR_AQ_EMPTY			= -104,
+	ICE_ERR_AQ_FW_CRITICAL			= -105,
+};
+
+#endif /* _ICE_STATUS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
new file mode 100644
index 000000000..0155c45d9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -0,0 +1,2856 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_switch.h"
+
+#define ICE_ETH_DA_OFFSET		0
+#define ICE_ETH_ETHTYPE_OFFSET		12
+#define ICE_ETH_VLAN_TCI_OFFSET		14
+#define ICE_MAX_VLAN_ID			0xFFF
+
+/* Dummy ethernet header needed in the ice_aqc_sw_rules_elem
+ * struct to configure any switch filter rules.
+ * {DA (6 bytes), SA(6 bytes),
+ * Ether type (2 bytes for header without VLAN tag) OR
+ * VLAN tag (4 bytes for header with VLAN tag) }
+ *
+ * Word on Hardcoded values
+ * byte 0 = 0x2: to identify it as locally administered DA MAC
+ * byte 6 = 0x2: to identify it as locally administered SA MAC
+ * byte 12 = 0x81 & byte 13 = 0x00:
+ *	In case of VLAN filter first two bytes defines ether type (0x8100)
+ *	and remaining two bytes are placeholder for programming a given VLAN ID
+ *	In case of Ether type filter it is treated as header without VLAN tag
+ *	and byte 12 and 13 is used to program a given Ether type instead
+ */
+#define DUMMY_ETH_HDR_LEN		16
+static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0,
+							0x2, 0, 0, 0, 0, 0,
+							0x81, 0, 0, 0};
+
+#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \
+	(offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr) + \
+	 (DUMMY_ETH_HDR_LEN * \
+	  sizeof(((struct ice_sw_rule_lkup_rx_tx *)0)->hdr[0])))
+#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE \
+	(offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr))
+#define ICE_SW_RULE_LG_ACT_SIZE(n) \
+	(offsetof(struct ice_aqc_sw_rules_elem, pdata.lg_act.act) + \
+	 ((n) * sizeof(((struct ice_sw_rule_lg_act *)0)->act[0])))
+#define ICE_SW_RULE_VSI_LIST_SIZE(n) \
+	(offsetof(struct ice_aqc_sw_rules_elem, pdata.vsi_list.vsi) + \
+	 ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi[0])))
+
+/**
+ * ice_init_def_sw_recp - initialize the recipe book keeping tables
+ * @hw: pointer to the HW struct
+ *
+ * Allocate memory for the entire recipe table and initialize the structures/
+ * entries corresponding to basic recipes.
+ */
+enum ice_status ice_init_def_sw_recp(struct ice_hw *hw)
+{
+	struct ice_sw_recipe *recps;
+	u8 i;
+
+	recps = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_NUM_RECIPES,
+			     sizeof(*recps), GFP_KERNEL);
+	if (!recps)
+		return ICE_ERR_NO_MEMORY;
+
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+		recps[i].root_rid = i;
+		INIT_LIST_HEAD(&recps[i].filt_rules);
+		INIT_LIST_HEAD(&recps[i].filt_replay_rules);
+		mutex_init(&recps[i].filt_rule_lock);
+	}
+
+	hw->switch_info->recp_list = recps;
+
+	return 0;
+}
+
+/**
+ * ice_aq_get_sw_cfg - get switch configuration
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the result buffer
+ * @buf_size: length of the buffer available for response
+ * @req_desc: pointer to requested descriptor
+ * @num_elems: pointer to number of elements
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get switch configuration (0x0200) to be placed in buf.
+ * This admin command returns information such as initial VSI/port number
+ * and switch ID it belongs to.
+ *
+ * NOTE: *req_desc is both an input/output parameter.
+ * The caller of this function first calls this function with *request_desc set
+ * to 0. If the response from f/w has *req_desc set to 0, all the switch
+ * configuration information has been returned; if non-zero (meaning not all
+ * the information was returned), the caller should call this function again
+ * with *req_desc set to the previous value returned by f/w to get the
+ * next block of switch configuration information.
+ *
+ * *num_elems is output only parameter. This reflects the number of elements
+ * in response buffer. The caller of this function to use *num_elems while
+ * parsing the response buffer.
+ */
+static enum ice_status
+ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp_elem *buf,
+		  u16 buf_size, u16 *req_desc, u16 *num_elems,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_sw_cfg *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sw_cfg);
+	cmd = &desc.params.get_sw_conf;
+	cmd->element = cpu_to_le16(*req_desc);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status) {
+		*req_desc = le16_to_cpu(cmd->element);
+		*num_elems = le16_to_cpu(cmd->num_elems);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_add_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware (0x0210)
+ */
+static enum ice_status
+ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *res;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.vsi_cmd;
+	res = &desc.params.add_update_free_vsi_res;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi);
+
+	if (!vsi_ctx->alloc_from_pool)
+		cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num |
+					   ICE_AQ_VSI_IS_VALID);
+	cmd->vf_id = vsi_ctx->vf_num;
+
+	cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+				 sizeof(vsi_ctx->info), cd);
+
+	if (!status) {
+		vsi_ctx->vsi_num = le16_to_cpu(res->vsi_num) & ICE_AQ_VSI_NUM_M;
+		vsi_ctx->vsis_allocd = le16_to_cpu(res->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(res->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_free_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Free VSI context info from hardware (0x0213)
+ */
+static enum ice_status
+ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = &desc.params.add_update_free_vsi_res;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+	if (keep_vsi_alloc)
+		cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_update_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware (0x0211)
+ */
+static enum ice_status
+ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = &desc.params.add_update_free_vsi_res;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+				 sizeof(vsi_ctx->info), cd);
+
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_is_vsi_valid - check whether the VSI is valid or not
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * check whether the VSI is valid or not
+ */
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle)
+{
+	return vsi_handle < ICE_MAX_VSI && hw->vsi_ctx[vsi_handle];
+}
+
+/**
+ * ice_get_hw_vsi_num - return the HW VSI number
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * return the HW VSI number
+ * Caution: call this function only if VSI is valid (ice_is_vsi_valid)
+ */
+u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle)
+{
+	return hw->vsi_ctx[vsi_handle]->vsi_num;
+}
+
+/**
+ * ice_get_vsi_ctx - return the VSI context entry for a given VSI handle
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * return the VSI context entry for a given VSI handle
+ */
+struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	return (vsi_handle >= ICE_MAX_VSI) ? NULL : hw->vsi_ctx[vsi_handle];
+}
+
+/**
+ * ice_save_vsi_ctx - save the VSI context for a given VSI handle
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @vsi: VSI context pointer
+ *
+ * save the VSI context entry for a given VSI handle
+ */
+static void
+ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi)
+{
+	hw->vsi_ctx[vsi_handle] = vsi;
+}
+
+/**
+ * ice_clear_vsi_q_ctx - clear VSI queue contexts for all TCs
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ */
+static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_vsi_ctx *vsi;
+	u8 i;
+
+	vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi)
+		return;
+	ice_for_each_traffic_class(i) {
+		if (vsi->lan_q_ctx[i]) {
+			devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]);
+			vsi->lan_q_ctx[i] = NULL;
+		}
+	}
+}
+
+/**
+ * ice_clear_vsi_ctx - clear the VSI context entry
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * clear the VSI context entry
+ */
+static void ice_clear_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_vsi_ctx *vsi;
+
+	vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	if (vsi) {
+		ice_clear_vsi_q_ctx(hw, vsi_handle);
+		devm_kfree(ice_hw_to_dev(hw), vsi);
+		hw->vsi_ctx[vsi_handle] = NULL;
+	}
+}
+
+/**
+ * ice_clear_all_vsi_ctx - clear all the VSI context entries
+ * @hw: pointer to the HW struct
+ */
+void ice_clear_all_vsi_ctx(struct ice_hw *hw)
+{
+	u16 i;
+
+	for (i = 0; i < ICE_MAX_VSI; i++)
+		ice_clear_vsi_ctx(hw, i);
+}
+
+/**
+ * ice_add_vsi - add VSI context to the hardware and VSI handle list
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle provided by drivers
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware also add it into the VSI handle list.
+ * If this function gets called after reset for existing VSIs then update
+ * with the new HW VSI number in the corresponding VSI handle list entry.
+ */
+enum ice_status
+ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	    struct ice_sq_cd *cd)
+{
+	struct ice_vsi_ctx *tmp_vsi_ctx;
+	enum ice_status status;
+
+	if (vsi_handle >= ICE_MAX_VSI)
+		return ICE_ERR_PARAM;
+	status = ice_aq_add_vsi(hw, vsi_ctx, cd);
+	if (status)
+		return status;
+	tmp_vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!tmp_vsi_ctx) {
+		/* Create a new VSI context */
+		tmp_vsi_ctx = devm_kzalloc(ice_hw_to_dev(hw),
+					   sizeof(*tmp_vsi_ctx), GFP_KERNEL);
+		if (!tmp_vsi_ctx) {
+			ice_aq_free_vsi(hw, vsi_ctx, false, cd);
+			return ICE_ERR_NO_MEMORY;
+		}
+		*tmp_vsi_ctx = *vsi_ctx;
+		ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx);
+	} else {
+		/* update with new HW VSI num */
+		tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_free_vsi- free VSI context from hardware and VSI handle list
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Free VSI context info from hardware as well as from VSI handle list
+ */
+enum ice_status
+ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	     bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+	enum ice_status status;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle);
+	status = ice_aq_free_vsi(hw, vsi_ctx, keep_vsi_alloc, cd);
+	if (!status)
+		ice_clear_vsi_ctx(hw, vsi_handle);
+	return status;
+}
+
+/**
+ * ice_update_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware
+ */
+enum ice_status
+ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle);
+	return ice_aq_update_vsi(hw, vsi_ctx, cd);
+}
+
+/**
+ * ice_aq_alloc_free_vsi_list
+ * @hw: pointer to the HW struct
+ * @vsi_list_id: VSI list ID returned or used for lookup
+ * @lkup_type: switch rule filter lookup type
+ * @opc: switch rules population command type - pass in the command opcode
+ *
+ * allocates or free a VSI list resource
+ */
+static enum ice_status
+ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id,
+			   enum ice_sw_lkup_type lkup_type,
+			   enum ice_adminq_opc opc)
+{
+	struct ice_aqc_alloc_free_res_elem *sw_buf;
+	struct ice_aqc_res_elem *vsi_ele;
+	enum ice_status status;
+	u16 buf_len;
+
+	buf_len = struct_size(sw_buf, elem, 1);
+	sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL);
+	if (!sw_buf)
+		return ICE_ERR_NO_MEMORY;
+	sw_buf->num_elems = cpu_to_le16(1);
+
+	if (lkup_type == ICE_SW_LKUP_MAC ||
+	    lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC_VLAN) {
+		sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP);
+	} else if (lkup_type == ICE_SW_LKUP_VLAN) {
+		sw_buf->res_type =
+			cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE);
+	} else {
+		status = ICE_ERR_PARAM;
+		goto ice_aq_alloc_free_vsi_list_exit;
+	}
+
+	if (opc == ice_aqc_opc_free_res)
+		sw_buf->elem[0].e.sw_resp = cpu_to_le16(*vsi_list_id);
+
+	status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, opc, NULL);
+	if (status)
+		goto ice_aq_alloc_free_vsi_list_exit;
+
+	if (opc == ice_aqc_opc_alloc_res) {
+		vsi_ele = &sw_buf->elem[0];
+		*vsi_list_id = le16_to_cpu(vsi_ele->e.sw_resp);
+	}
+
+ice_aq_alloc_free_vsi_list_exit:
+	devm_kfree(ice_hw_to_dev(hw), sw_buf);
+	return status;
+}
+
+/**
+ * ice_aq_sw_rules - add/update/remove switch rules
+ * @hw: pointer to the HW struct
+ * @rule_list: pointer to switch rule population list
+ * @rule_list_sz: total size of the rule list in bytes
+ * @num_rules: number of switch rules in the rule_list
+ * @opc: switch rules population command type - pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware
+ */
+static enum ice_status
+ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz,
+		u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (opc != ice_aqc_opc_add_sw_rules &&
+	    opc != ice_aqc_opc_update_sw_rules &&
+	    opc != ice_aqc_opc_remove_sw_rules)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	desc.params.sw_rules.num_rules_fltr_entry_index =
+		cpu_to_le16(num_rules);
+	status = ice_aq_send_cmd(hw, &desc, rule_list, rule_list_sz, cd);
+	if (opc != ice_aqc_opc_add_sw_rules &&
+	    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
+		status = ICE_ERR_DOES_NOT_EXIST;
+
+	return status;
+}
+
+/* ice_init_port_info - Initialize port_info with switch configuration data
+ * @pi: pointer to port_info
+ * @vsi_port_num: VSI number or port number
+ * @type: Type of switch element (port or VSI)
+ * @swid: switch ID of the switch the element is attached to
+ * @pf_vf_num: PF or VF number
+ * @is_vf: true if the element is a VF, false otherwise
+ */
+static void
+ice_init_port_info(struct ice_port_info *pi, u16 vsi_port_num, u8 type,
+		   u16 swid, u16 pf_vf_num, bool is_vf)
+{
+	switch (type) {
+	case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT:
+		pi->lport = (u8)(vsi_port_num & ICE_LPORT_MASK);
+		pi->sw_id = swid;
+		pi->pf_vf_num = pf_vf_num;
+		pi->is_vf = is_vf;
+		pi->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL;
+		pi->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL;
+		break;
+	default:
+		ice_debug(pi->hw, ICE_DBG_SW,
+			  "incorrect VSI/port type received\n");
+		break;
+	}
+}
+
+/* ice_get_initial_sw_cfg - Get initial port and default VSI data
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw)
+{
+	struct ice_aqc_get_sw_cfg_resp_elem *rbuf;
+	enum ice_status status;
+	u16 req_desc = 0;
+	u16 num_elems;
+	u16 i;
+
+	rbuf = devm_kzalloc(ice_hw_to_dev(hw), ICE_SW_CFG_MAX_BUF_LEN,
+			    GFP_KERNEL);
+
+	if (!rbuf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Multiple calls to ice_aq_get_sw_cfg may be required
+	 * to get all the switch configuration information. The need
+	 * for additional calls is indicated by ice_aq_get_sw_cfg
+	 * writing a non-zero value in req_desc
+	 */
+	do {
+		struct ice_aqc_get_sw_cfg_resp_elem *ele;
+
+		status = ice_aq_get_sw_cfg(hw, rbuf, ICE_SW_CFG_MAX_BUF_LEN,
+					   &req_desc, &num_elems, NULL);
+
+		if (status)
+			break;
+
+		for (i = 0, ele = rbuf; i < num_elems; i++, ele++) {
+			u16 pf_vf_num, swid, vsi_port_num;
+			bool is_vf = false;
+			u8 res_type;
+
+			vsi_port_num = le16_to_cpu(ele->vsi_port_num) &
+				ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M;
+
+			pf_vf_num = le16_to_cpu(ele->pf_vf_num) &
+				ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M;
+
+			swid = le16_to_cpu(ele->swid);
+
+			if (le16_to_cpu(ele->pf_vf_num) &
+			    ICE_AQC_GET_SW_CONF_RESP_IS_VF)
+				is_vf = true;
+
+			res_type = (u8)(le16_to_cpu(ele->vsi_port_num) >>
+					ICE_AQC_GET_SW_CONF_RESP_TYPE_S);
+
+			if (res_type == ICE_AQC_GET_SW_CONF_RESP_VSI) {
+				/* FW VSI is not needed. Just continue. */
+				continue;
+			}
+
+			ice_init_port_info(hw->port_info, vsi_port_num,
+					   res_type, swid, pf_vf_num, is_vf);
+		}
+	} while (req_desc && !status);
+
+	devm_kfree(ice_hw_to_dev(hw), (void *)rbuf);
+	return status;
+}
+
+/**
+ * ice_fill_sw_info - Helper function to populate lb_en and lan_en
+ * @hw: pointer to the hardware structure
+ * @fi: filter info structure to fill/update
+ *
+ * This helper function populates the lb_en and lan_en elements of the provided
+ * ice_fltr_info struct using the switch's type and characteristics of the
+ * switch rule being configured.
+ */
+static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi)
+{
+	fi->lb_en = false;
+	fi->lan_en = false;
+	if ((fi->flag & ICE_FLTR_TX) &&
+	    (fi->fltr_act == ICE_FWD_TO_VSI ||
+	     fi->fltr_act == ICE_FWD_TO_VSI_LIST ||
+	     fi->fltr_act == ICE_FWD_TO_Q ||
+	     fi->fltr_act == ICE_FWD_TO_QGRP)) {
+		/* Setting LB for prune actions will result in replicated
+		 * packets to the internal switch that will be dropped.
+		 */
+		if (fi->lkup_type != ICE_SW_LKUP_VLAN)
+			fi->lb_en = true;
+
+		/* Set lan_en to TRUE if
+		 * 1. The switch is a VEB AND
+		 * 2
+		 * 2.1 The lookup is a directional lookup like ethertype,
+		 * promiscuous, ethertype-MAC, promiscuous-VLAN
+		 * and default-port OR
+		 * 2.2 The lookup is VLAN, OR
+		 * 2.3 The lookup is MAC with mcast or bcast addr for MAC, OR
+		 * 2.4 The lookup is MAC_VLAN with mcast or bcast addr for MAC.
+		 *
+		 * OR
+		 *
+		 * The switch is a VEPA.
+		 *
+		 * In all other cases, the LAN enable has to be set to false.
+		 */
+		if (hw->evb_veb) {
+			if (fi->lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+			    fi->lkup_type == ICE_SW_LKUP_PROMISC ||
+			    fi->lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+			    fi->lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
+			    fi->lkup_type == ICE_SW_LKUP_DFLT ||
+			    fi->lkup_type == ICE_SW_LKUP_VLAN ||
+			    (fi->lkup_type == ICE_SW_LKUP_MAC &&
+			     !is_unicast_ether_addr(fi->l_data.mac.mac_addr)) ||
+			    (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN &&
+			     !is_unicast_ether_addr(fi->l_data.mac.mac_addr)))
+				fi->lan_en = true;
+		} else {
+			fi->lan_en = true;
+		}
+	}
+}
+
+/**
+ * ice_fill_sw_rule - Helper function to fill switch rule structure
+ * @hw: pointer to the hardware structure
+ * @f_info: entry containing packet forwarding information
+ * @s_rule: switch rule structure to be filled in based on mac_entry
+ * @opc: switch rules population command type - pass in the command opcode
+ */
+static void
+ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
+		 struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc)
+{
+	u16 vlan_id = ICE_MAX_VLAN_ID + 1;
+	void *daddr = NULL;
+	u16 eth_hdr_sz;
+	u8 *eth_hdr;
+	u32 act = 0;
+	__be16 *off;
+	u8 q_rgn;
+
+	if (opc == ice_aqc_opc_remove_sw_rules) {
+		s_rule->pdata.lkup_tx_rx.act = 0;
+		s_rule->pdata.lkup_tx_rx.index =
+			cpu_to_le16(f_info->fltr_rule_id);
+		s_rule->pdata.lkup_tx_rx.hdr_len = 0;
+		return;
+	}
+
+	eth_hdr_sz = sizeof(dummy_eth_header);
+	eth_hdr = s_rule->pdata.lkup_tx_rx.hdr;
+
+	/* initialize the ether header with a dummy header */
+	memcpy(eth_hdr, dummy_eth_header, eth_hdr_sz);
+	ice_fill_sw_info(hw, f_info);
+
+	switch (f_info->fltr_act) {
+	case ICE_FWD_TO_VSI:
+		act |= (f_info->fwd_id.hw_vsi_id << ICE_SINGLE_ACT_VSI_ID_S) &
+			ICE_SINGLE_ACT_VSI_ID_M;
+		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+				ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_VSI_LIST:
+		act |= ICE_SINGLE_ACT_VSI_LIST;
+		act |= (f_info->fwd_id.vsi_list_id <<
+			ICE_SINGLE_ACT_VSI_LIST_ID_S) &
+			ICE_SINGLE_ACT_VSI_LIST_ID_M;
+		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+				ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_Q:
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+			ICE_SINGLE_ACT_Q_INDEX_M;
+		break;
+	case ICE_DROP_PACKET:
+		act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP |
+			ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_QGRP:
+		q_rgn = f_info->qgrp_size > 0 ?
+			(u8)ilog2(f_info->qgrp_size) : 0;
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+			ICE_SINGLE_ACT_Q_INDEX_M;
+		act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) &
+			ICE_SINGLE_ACT_Q_REGION_M;
+		break;
+	default:
+		return;
+	}
+
+	if (f_info->lb_en)
+		act |= ICE_SINGLE_ACT_LB_ENABLE;
+	if (f_info->lan_en)
+		act |= ICE_SINGLE_ACT_LAN_ENABLE;
+
+	switch (f_info->lkup_type) {
+	case ICE_SW_LKUP_MAC:
+		daddr = f_info->l_data.mac.mac_addr;
+		break;
+	case ICE_SW_LKUP_VLAN:
+		vlan_id = f_info->l_data.vlan.vlan_id;
+		if (f_info->fltr_act == ICE_FWD_TO_VSI ||
+		    f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
+			act |= ICE_SINGLE_ACT_PRUNE;
+			act |= ICE_SINGLE_ACT_EGRESS | ICE_SINGLE_ACT_INGRESS;
+		}
+		break;
+	case ICE_SW_LKUP_ETHERTYPE_MAC:
+		daddr = f_info->l_data.ethertype_mac.mac_addr;
+		fallthrough;
+	case ICE_SW_LKUP_ETHERTYPE:
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+		*off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
+		break;
+	case ICE_SW_LKUP_MAC_VLAN:
+		daddr = f_info->l_data.mac_vlan.mac_addr;
+		vlan_id = f_info->l_data.mac_vlan.vlan_id;
+		break;
+	case ICE_SW_LKUP_PROMISC_VLAN:
+		vlan_id = f_info->l_data.mac_vlan.vlan_id;
+		fallthrough;
+	case ICE_SW_LKUP_PROMISC:
+		daddr = f_info->l_data.mac_vlan.mac_addr;
+		break;
+	default:
+		break;
+	}
+
+	s_rule->type = (f_info->flag & ICE_FLTR_RX) ?
+		cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX) :
+		cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
+
+	/* Recipe set depending on lookup type */
+	s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(f_info->lkup_type);
+	s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(f_info->src);
+	s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act);
+
+	if (daddr)
+		ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr);
+
+	if (!(vlan_id > ICE_MAX_VLAN_ID)) {
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
+		*off = cpu_to_be16(vlan_id);
+	}
+
+	/* Create the switch rule with the final dummy Ethernet header */
+	if (opc != ice_aqc_opc_update_sw_rules)
+		s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(eth_hdr_sz);
+}
+
+/**
+ * ice_add_marker_act
+ * @hw: pointer to the hardware structure
+ * @m_ent: the management entry for which sw marker needs to be added
+ * @sw_marker: sw marker to tag the Rx descriptor with
+ * @l_id: large action resource ID
+ *
+ * Create a large action to hold software marker and update the switch rule
+ * entry pointed by m_ent with newly created large action
+ */
+static enum ice_status
+ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent,
+		   u16 sw_marker, u16 l_id)
+{
+	struct ice_aqc_sw_rules_elem *lg_act, *rx_tx;
+	/* For software marker we need 3 large actions
+	 * 1. FWD action: FWD TO VSI or VSI LIST
+	 * 2. GENERIC VALUE action to hold the profile ID
+	 * 3. GENERIC VALUE action to hold the software marker ID
+	 */
+	const u16 num_lg_acts = 3;
+	enum ice_status status;
+	u16 lg_act_size;
+	u16 rules_size;
+	u32 act;
+	u16 id;
+
+	if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
+		return ICE_ERR_PARAM;
+
+	/* Create two back-to-back switch rules and submit them to the HW using
+	 * one memory buffer:
+	 *    1. Large Action
+	 *    2. Look up Tx Rx
+	 */
+	lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(num_lg_acts);
+	rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE;
+	lg_act = devm_kzalloc(ice_hw_to_dev(hw), rules_size, GFP_KERNEL);
+	if (!lg_act)
+		return ICE_ERR_NO_MEMORY;
+
+	rx_tx = (struct ice_aqc_sw_rules_elem *)((u8 *)lg_act + lg_act_size);
+
+	/* Fill in the first switch rule i.e. large action */
+	lg_act->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT);
+	lg_act->pdata.lg_act.index = cpu_to_le16(l_id);
+	lg_act->pdata.lg_act.size = cpu_to_le16(num_lg_acts);
+
+	/* First action VSI forwarding or VSI list forwarding depending on how
+	 * many VSIs
+	 */
+	id = (m_ent->vsi_count > 1) ? m_ent->fltr_info.fwd_id.vsi_list_id :
+		m_ent->fltr_info.fwd_id.hw_vsi_id;
+
+	act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT;
+	act |= (id << ICE_LG_ACT_VSI_LIST_ID_S) & ICE_LG_ACT_VSI_LIST_ID_M;
+	if (m_ent->vsi_count > 1)
+		act |= ICE_LG_ACT_VSI_LIST;
+	lg_act->pdata.lg_act.act[0] = cpu_to_le32(act);
+
+	/* Second action descriptor type */
+	act = ICE_LG_ACT_GENERIC;
+
+	act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+	lg_act->pdata.lg_act.act[1] = cpu_to_le32(act);
+
+	act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX <<
+	       ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M;
+
+	/* Third action Marker value */
+	act |= ICE_LG_ACT_GENERIC;
+	act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) &
+		ICE_LG_ACT_GENERIC_VALUE_M;
+
+	lg_act->pdata.lg_act.act[2] = cpu_to_le32(act);
+
+	/* call the fill switch rule to fill the lookup Tx Rx structure */
+	ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx,
+			 ice_aqc_opc_update_sw_rules);
+
+	/* Update the action to point to the large action ID */
+	rx_tx->pdata.lkup_tx_rx.act =
+		cpu_to_le32(ICE_SINGLE_ACT_PTR |
+			    ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) &
+			     ICE_SINGLE_ACT_PTR_VAL_M));
+
+	/* Use the filter rule ID of the previously created rule with single
+	 * act. Once the update happens, hardware will treat this as large
+	 * action
+	 */
+	rx_tx->pdata.lkup_tx_rx.index =
+		cpu_to_le16(m_ent->fltr_info.fltr_rule_id);
+
+	status = ice_aq_sw_rules(hw, lg_act, rules_size, 2,
+				 ice_aqc_opc_update_sw_rules, NULL);
+	if (!status) {
+		m_ent->lg_act_idx = l_id;
+		m_ent->sw_marker_id = sw_marker;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), lg_act);
+	return status;
+}
+
+/**
+ * ice_create_vsi_list_map
+ * @hw: pointer to the hardware structure
+ * @vsi_handle_arr: array of VSI handles to set in the VSI mapping
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ *
+ * Helper function to create a new entry of VSI list ID to VSI mapping
+ * using the given VSI list ID
+ */
+static struct ice_vsi_list_map_info *
+ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+			u16 vsi_list_id)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_vsi_list_map_info *v_map;
+	int i;
+
+	v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL);
+	if (!v_map)
+		return NULL;
+
+	v_map->vsi_list_id = vsi_list_id;
+	v_map->ref_cnt = 1;
+	for (i = 0; i < num_vsi; i++)
+		set_bit(vsi_handle_arr[i], v_map->vsi_map);
+
+	list_add(&v_map->list_entry, &sw->vsi_list_map_head);
+	return v_map;
+}
+
+/**
+ * ice_update_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_handle_arr: array of VSI handles to form a VSI list
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ * @remove: Boolean value to indicate if this is a remove action
+ * @opc: switch rules population command type - pass in the command opcode
+ * @lkup_type: lookup type of the filter
+ *
+ * Call AQ command to add a new switch rule or update existing switch rule
+ * using the given VSI list ID
+ */
+static enum ice_status
+ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+			 u16 vsi_list_id, bool remove, enum ice_adminq_opc opc,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_status status;
+	u16 s_rule_size;
+	u16 rule_type;
+	int i;
+
+	if (!num_vsi)
+		return ICE_ERR_PARAM;
+
+	if (lkup_type == ICE_SW_LKUP_MAC ||
+	    lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC_VLAN)
+		rule_type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR :
+			ICE_AQC_SW_RULES_T_VSI_LIST_SET;
+	else if (lkup_type == ICE_SW_LKUP_VLAN)
+		rule_type = remove ? ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR :
+			ICE_AQC_SW_RULES_T_PRUNE_LIST_SET;
+	else
+		return ICE_ERR_PARAM;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(num_vsi);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+	for (i = 0; i < num_vsi; i++) {
+		if (!ice_is_vsi_valid(hw, vsi_handle_arr[i])) {
+			status = ICE_ERR_PARAM;
+			goto exit;
+		}
+		/* AQ call requires hw_vsi_id(s) */
+		s_rule->pdata.vsi_list.vsi[i] =
+			cpu_to_le16(ice_get_hw_vsi_num(hw, vsi_handle_arr[i]));
+	}
+
+	s_rule->type = cpu_to_le16(rule_type);
+	s_rule->pdata.vsi_list.number_vsi = cpu_to_le16(num_vsi);
+	s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id);
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL);
+
+exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_create_vsi_list_rule - Creates and populates a VSI list rule
+ * @hw: pointer to the HW struct
+ * @vsi_handle_arr: array of VSI handles to form a VSI list
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: stores the ID of the VSI list to be created
+ * @lkup_type: switch rule filter's lookup type
+ */
+static enum ice_status
+ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+			 u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type)
+{
+	enum ice_status status;
+
+	status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type,
+					    ice_aqc_opc_alloc_res);
+	if (status)
+		return status;
+
+	/* Update the newly created VSI list to include the specified VSIs */
+	return ice_update_vsi_list_rule(hw, vsi_handle_arr, num_vsi,
+					*vsi_list_id, false,
+					ice_aqc_opc_add_sw_rules, lkup_type);
+}
+
+/**
+ * ice_create_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: entry containing packet forwarding information
+ *
+ * Create switch rule with given filter information and add an entry
+ * to the corresponding filter management list to track this switch rule
+ * and VSI mapping
+ */
+static enum ice_status
+ice_create_pkt_fwd_rule(struct ice_hw *hw,
+			struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_sw_lkup_type l_type;
+	struct ice_sw_recipe *recp;
+	enum ice_status status;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+			      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+	fm_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*fm_entry),
+				GFP_KERNEL);
+	if (!fm_entry) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_create_pkt_fwd_rule_exit;
+	}
+
+	fm_entry->fltr_info = f_entry->fltr_info;
+
+	/* Initialize all the fields for the management entry */
+	fm_entry->vsi_count = 1;
+	fm_entry->lg_act_idx = ICE_INVAL_LG_ACT_INDEX;
+	fm_entry->sw_marker_id = ICE_INVAL_SW_MARKER_ID;
+	fm_entry->counter_index = ICE_INVAL_COUNTER_ID;
+
+	ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule,
+			 ice_aqc_opc_add_sw_rules);
+
+	status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
+				 ice_aqc_opc_add_sw_rules, NULL);
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+		goto ice_create_pkt_fwd_rule_exit;
+	}
+
+	f_entry->fltr_info.fltr_rule_id =
+		le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+	fm_entry->fltr_info.fltr_rule_id =
+		le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+
+	/* The book keeping entries will get removed when base driver
+	 * calls remove filter AQ command
+	 */
+	l_type = fm_entry->fltr_info.lkup_type;
+	recp = &hw->switch_info->recp_list[l_type];
+	list_add(&fm_entry->list_entry, &recp->filt_rules);
+
+ice_create_pkt_fwd_rule_exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_update_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @f_info: filter information for switch rule
+ *
+ * Call AQ command to update a previously created switch rule with a
+ * VSI list ID
+ */
+static enum ice_status
+ice_update_pkt_fwd_rule(struct ice_hw *hw, struct ice_fltr_info *f_info)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_status status;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+			      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	ice_fill_sw_rule(hw, f_info, s_rule, ice_aqc_opc_update_sw_rules);
+
+	s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(f_info->fltr_rule_id);
+
+	/* Update switch rule with new rule set to forward VSI list */
+	status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
+				 ice_aqc_opc_update_sw_rules, NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_update_sw_rule_bridge_mode
+ * @hw: pointer to the HW struct
+ *
+ * Updates unicast switch filter rules based on VEB/VEPA mode
+ */
+enum ice_status ice_update_sw_rule_bridge_mode(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	enum ice_status status = 0;
+	struct list_head *rule_head;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
+
+	mutex_lock(rule_lock);
+	list_for_each_entry(fm_entry, rule_head, list_entry) {
+		struct ice_fltr_info *fi = &fm_entry->fltr_info;
+		u8 *addr = fi->l_data.mac.mac_addr;
+
+		/* Update unicast Tx rules to reflect the selected
+		 * VEB/VEPA mode
+		 */
+		if ((fi->flag & ICE_FLTR_TX) && is_unicast_ether_addr(addr) &&
+		    (fi->fltr_act == ICE_FWD_TO_VSI ||
+		     fi->fltr_act == ICE_FWD_TO_VSI_LIST ||
+		     fi->fltr_act == ICE_FWD_TO_Q ||
+		     fi->fltr_act == ICE_FWD_TO_QGRP)) {
+			status = ice_update_pkt_fwd_rule(hw, fi);
+			if (status)
+				break;
+		}
+	}
+
+	mutex_unlock(rule_lock);
+
+	return status;
+}
+
+/**
+ * ice_add_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @m_entry: pointer to current filter management list entry
+ * @cur_fltr: filter information from the book keeping entry
+ * @new_fltr: filter information with the new VSI to be added
+ *
+ * Call AQ command to add or update previously created VSI list with new VSI.
+ *
+ * Helper function to do book keeping associated with adding filter information
+ * The algorithm to do the book keeping is described below :
+ * When a VSI needs to subscribe to a given filter (MAC/VLAN/Ethtype etc.)
+ *	if only one VSI has been added till now
+ *		Allocate a new VSI list and add two VSIs
+ *		to this list using switch rule command
+ *		Update the previously created switch rule with the
+ *		newly created VSI list ID
+ *	if a VSI list was previously created
+ *		Add the new VSI to the previously created VSI list set
+ *		using the update switch rule command
+ */
+static enum ice_status
+ice_add_update_vsi_list(struct ice_hw *hw,
+			struct ice_fltr_mgmt_list_entry *m_entry,
+			struct ice_fltr_info *cur_fltr,
+			struct ice_fltr_info *new_fltr)
+{
+	enum ice_status status = 0;
+	u16 vsi_list_id = 0;
+
+	if ((cur_fltr->fltr_act == ICE_FWD_TO_Q ||
+	     cur_fltr->fltr_act == ICE_FWD_TO_QGRP))
+		return ICE_ERR_NOT_IMPL;
+
+	if ((new_fltr->fltr_act == ICE_FWD_TO_Q ||
+	     new_fltr->fltr_act == ICE_FWD_TO_QGRP) &&
+	    (cur_fltr->fltr_act == ICE_FWD_TO_VSI ||
+	     cur_fltr->fltr_act == ICE_FWD_TO_VSI_LIST))
+		return ICE_ERR_NOT_IMPL;
+
+	if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) {
+		/* Only one entry existed in the mapping and it was not already
+		 * a part of a VSI list. So, create a VSI list with the old and
+		 * new VSIs.
+		 */
+		struct ice_fltr_info tmp_fltr;
+		u16 vsi_handle_arr[2];
+
+		/* A rule already exists with the new VSI being added */
+		if (cur_fltr->fwd_id.hw_vsi_id == new_fltr->fwd_id.hw_vsi_id)
+			return ICE_ERR_ALREADY_EXISTS;
+
+		vsi_handle_arr[0] = cur_fltr->vsi_handle;
+		vsi_handle_arr[1] = new_fltr->vsi_handle;
+		status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+						  &vsi_list_id,
+						  new_fltr->lkup_type);
+		if (status)
+			return status;
+
+		tmp_fltr = *new_fltr;
+		tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id;
+		tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+		tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+		/* Update the previous switch rule of "MAC forward to VSI" to
+		 * "MAC fwd to VSI list"
+		 */
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+		if (status)
+			return status;
+
+		cur_fltr->fwd_id.vsi_list_id = vsi_list_id;
+		cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+		m_entry->vsi_list_info =
+			ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+						vsi_list_id);
+
+		if (!m_entry->vsi_list_info)
+			return ICE_ERR_NO_MEMORY;
+
+		/* If this entry was large action then the large action needs
+		 * to be updated to point to FWD to VSI list
+		 */
+		if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID)
+			status =
+			    ice_add_marker_act(hw, m_entry,
+					       m_entry->sw_marker_id,
+					       m_entry->lg_act_idx);
+	} else {
+		u16 vsi_handle = new_fltr->vsi_handle;
+		enum ice_adminq_opc opcode;
+
+		if (!m_entry->vsi_list_info)
+			return ICE_ERR_CFG;
+
+		/* A rule already exists with the new VSI being added */
+		if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
+			return 0;
+
+		/* Update the previously created VSI list set with
+		 * the new VSI ID passed in
+		 */
+		vsi_list_id = cur_fltr->fwd_id.vsi_list_id;
+		opcode = ice_aqc_opc_update_sw_rules;
+
+		status = ice_update_vsi_list_rule(hw, &vsi_handle, 1,
+						  vsi_list_id, false, opcode,
+						  new_fltr->lkup_type);
+		/* update VSI list mapping info with new VSI ID */
+		if (!status)
+			set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map);
+	}
+	if (!status)
+		m_entry->vsi_count++;
+	return status;
+}
+
+/**
+ * ice_find_rule_entry - Search a rule entry
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type for which the specified rule needs to be searched
+ * @f_info: rule information
+ *
+ * Helper function to search for a given rule entry
+ * Returns pointer to entry storing the rule if found
+ */
+static struct ice_fltr_mgmt_list_entry *
+ice_find_rule_entry(struct ice_hw *hw, u8 recp_id, struct ice_fltr_info *f_info)
+{
+	struct ice_fltr_mgmt_list_entry *list_itr, *ret = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+	struct list_head *list_head;
+
+	list_head = &sw->recp_list[recp_id].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data,
+			    sizeof(f_info->l_data)) &&
+		    f_info->flag == list_itr->fltr_info.flag) {
+			ret = list_itr;
+			break;
+		}
+	}
+	return ret;
+}
+
+/**
+ * ice_find_vsi_list_entry - Search VSI list map with VSI count 1
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type for which VSI lists needs to be searched
+ * @vsi_handle: VSI handle to be found in VSI list
+ * @vsi_list_id: VSI list ID found containing vsi_handle
+ *
+ * Helper function to search a VSI list with single entry containing given VSI
+ * handle element. This can be extended further to search VSI list with more
+ * than 1 vsi_count. Returns pointer to VSI list entry if found.
+ */
+static struct ice_vsi_list_map_info *
+ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
+			u16 *vsi_list_id)
+{
+	struct ice_vsi_list_map_info *map_info = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *list_itr;
+	struct list_head *list_head;
+
+	list_head = &sw->recp_list[recp_id].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) {
+			map_info = list_itr->vsi_list_info;
+			if (test_bit(vsi_handle, map_info->vsi_map)) {
+				*vsi_list_id = map_info->vsi_list_id;
+				return map_info;
+			}
+		}
+	}
+	return NULL;
+}
+
+/**
+ * ice_add_rule_internal - add rule for a given lookup type
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type (recipe ID) for which rule has to be added
+ * @f_entry: structure containing MAC forwarding information
+ *
+ * Adds or updates the rule lists for a given recipe
+ */
+static enum ice_status
+ice_add_rule_internal(struct ice_hw *hw, u8 recp_id,
+		      struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	struct ice_fltr_mgmt_list_entry *m_entry;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	enum ice_status status = 0;
+
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return ICE_ERR_PARAM;
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+
+	rule_lock = &sw->recp_list[recp_id].filt_rule_lock;
+
+	mutex_lock(rule_lock);
+	new_fltr = &f_entry->fltr_info;
+	if (new_fltr->flag & ICE_FLTR_RX)
+		new_fltr->src = hw->port_info->lport;
+	else if (new_fltr->flag & ICE_FLTR_TX)
+		new_fltr->src = f_entry->fltr_info.fwd_id.hw_vsi_id;
+
+	m_entry = ice_find_rule_entry(hw, recp_id, new_fltr);
+	if (!m_entry) {
+		mutex_unlock(rule_lock);
+		return ice_create_pkt_fwd_rule(hw, f_entry);
+	}
+
+	cur_fltr = &m_entry->fltr_info;
+	status = ice_add_update_vsi_list(hw, m_entry, cur_fltr, new_fltr);
+	mutex_unlock(rule_lock);
+
+	return status;
+}
+
+/**
+ * ice_remove_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ * @lkup_type: switch rule filter lookup type
+ *
+ * The VSI list should be emptied before this function is called to remove the
+ * VSI list.
+ */
+static enum ice_status
+ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_status status;
+	u16 s_rule_size;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(0);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR);
+	s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id);
+
+	/* Free the vsi_list resource that we allocated. It is assumed that the
+	 * list is empty at this point.
+	 */
+	status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type,
+					    ice_aqc_opc_free_res);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_rem_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle of the VSI to remove
+ * @fm_list: filter management entry for which the VSI list management needs to
+ *           be done
+ */
+static enum ice_status
+ice_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle,
+			struct ice_fltr_mgmt_list_entry *fm_list)
+{
+	enum ice_sw_lkup_type lkup_type;
+	enum ice_status status = 0;
+	u16 vsi_list_id;
+
+	if (fm_list->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST ||
+	    fm_list->vsi_count == 0)
+		return ICE_ERR_PARAM;
+
+	/* A rule with the VSI being removed does not exist */
+	if (!test_bit(vsi_handle, fm_list->vsi_list_info->vsi_map))
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	lkup_type = fm_list->fltr_info.lkup_type;
+	vsi_list_id = fm_list->fltr_info.fwd_id.vsi_list_id;
+	status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true,
+					  ice_aqc_opc_update_sw_rules,
+					  lkup_type);
+	if (status)
+		return status;
+
+	fm_list->vsi_count--;
+	clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map);
+
+	if (fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) {
+		struct ice_fltr_info tmp_fltr_info = fm_list->fltr_info;
+		struct ice_vsi_list_map_info *vsi_list_info =
+			fm_list->vsi_list_info;
+		u16 rem_vsi_handle;
+
+		rem_vsi_handle = find_first_bit(vsi_list_info->vsi_map,
+						ICE_MAX_VSI);
+		if (!ice_is_vsi_valid(hw, rem_vsi_handle))
+			return ICE_ERR_OUT_OF_RANGE;
+
+		/* Make sure VSI list is empty before removing it below */
+		status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1,
+						  vsi_list_id, true,
+						  ice_aqc_opc_update_sw_rules,
+						  lkup_type);
+		if (status)
+			return status;
+
+		tmp_fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		tmp_fltr_info.fwd_id.hw_vsi_id =
+			ice_get_hw_vsi_num(hw, rem_vsi_handle);
+		tmp_fltr_info.vsi_handle = rem_vsi_handle;
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr_info);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SW,
+				  "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n",
+				  tmp_fltr_info.fwd_id.hw_vsi_id, status);
+			return status;
+		}
+
+		fm_list->fltr_info = tmp_fltr_info;
+	}
+
+	if ((fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) ||
+	    (fm_list->vsi_count == 0 && lkup_type == ICE_SW_LKUP_VLAN)) {
+		struct ice_vsi_list_map_info *vsi_list_info =
+			fm_list->vsi_list_info;
+
+		/* Remove the VSI list since it is no longer used */
+		status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SW,
+				  "Failed to remove VSI list %d, error %d\n",
+				  vsi_list_id, status);
+			return status;
+		}
+
+		list_del(&vsi_list_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
+		fm_list->vsi_list_info = NULL;
+	}
+
+	return status;
+}
+
+/**
+ * ice_remove_rule_internal - Remove a filter rule of a given type
+ * @hw: pointer to the hardware structure
+ * @recp_id: recipe ID for which the rule needs to removed
+ * @f_entry: rule entry containing filter information
+ */
+static enum ice_status
+ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id,
+			 struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *list_elem;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	enum ice_status status = 0;
+	bool remove_rule = false;
+	u16 vsi_handle;
+
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return ICE_ERR_PARAM;
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+
+	rule_lock = &sw->recp_list[recp_id].filt_rule_lock;
+	mutex_lock(rule_lock);
+	list_elem = ice_find_rule_entry(hw, recp_id, &f_entry->fltr_info);
+	if (!list_elem) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto exit;
+	}
+
+	if (list_elem->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST) {
+		remove_rule = true;
+	} else if (!list_elem->vsi_list_info) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto exit;
+	} else if (list_elem->vsi_list_info->ref_cnt > 1) {
+		/* a ref_cnt > 1 indicates that the vsi_list is being
+		 * shared by multiple rules. Decrement the ref_cnt and
+		 * remove this rule, but do not modify the list, as it
+		 * is in-use by other rules.
+		 */
+		list_elem->vsi_list_info->ref_cnt--;
+		remove_rule = true;
+	} else {
+		/* a ref_cnt of 1 indicates the vsi_list is only used
+		 * by one rule. However, the original removal request is only
+		 * for a single VSI. Update the vsi_list first, and only
+		 * remove the rule if there are no further VSIs in this list.
+		 */
+		vsi_handle = f_entry->fltr_info.vsi_handle;
+		status = ice_rem_update_vsi_list(hw, vsi_handle, list_elem);
+		if (status)
+			goto exit;
+		/* if VSI count goes to zero after updating the VSI list */
+		if (list_elem->vsi_count == 0)
+			remove_rule = true;
+	}
+
+	if (remove_rule) {
+		/* Remove the lookup rule */
+		struct ice_aqc_sw_rules_elem *s_rule;
+
+		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+				      ICE_SW_RULE_RX_TX_NO_HDR_SIZE,
+				      GFP_KERNEL);
+		if (!s_rule) {
+			status = ICE_ERR_NO_MEMORY;
+			goto exit;
+		}
+
+		ice_fill_sw_rule(hw, &list_elem->fltr_info, s_rule,
+				 ice_aqc_opc_remove_sw_rules);
+
+		status = ice_aq_sw_rules(hw, s_rule,
+					 ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1,
+					 ice_aqc_opc_remove_sw_rules, NULL);
+
+		/* Remove a book keeping from the list */
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
+
+		if (status)
+			goto exit;
+
+		list_del(&list_elem->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), list_elem);
+	}
+exit:
+	mutex_unlock(rule_lock);
+	return status;
+}
+
+/**
+ * ice_add_mac - Add a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * IMPORTANT: When the ucast_shared flag is set to false and m_list has
+ * multiple unicast addresses, the function assumes that all the
+ * addresses are unique in a given add_mac call. It doesn't
+ * check for duplicates in this case, removing duplicates from a given
+ * list should be taken care of in the caller of this function.
+ */
+enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
+	struct ice_fltr_list_entry *m_list_itr;
+	struct list_head *rule_head;
+	u16 total_elem_left, s_rule_size;
+	struct ice_switch_info *sw;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	enum ice_status status = 0;
+	u16 num_unicast = 0;
+	u8 elem_sent;
+
+	if (!m_list || !hw)
+		return ICE_ERR_PARAM;
+
+	s_rule = NULL;
+	sw = hw->switch_info;
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
+		u16 vsi_handle;
+		u16 hw_vsi_id;
+
+		m_list_itr->fltr_info.flag = ICE_FLTR_TX;
+		vsi_handle = m_list_itr->fltr_info.vsi_handle;
+		if (!ice_is_vsi_valid(hw, vsi_handle))
+			return ICE_ERR_PARAM;
+		hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+		m_list_itr->fltr_info.fwd_id.hw_vsi_id = hw_vsi_id;
+		/* update the src in case it is VSI num */
+		if (m_list_itr->fltr_info.src_id != ICE_SRC_ID_VSI)
+			return ICE_ERR_PARAM;
+		m_list_itr->fltr_info.src = hw_vsi_id;
+		if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC ||
+		    is_zero_ether_addr(add))
+			return ICE_ERR_PARAM;
+		if (is_unicast_ether_addr(add) && !hw->ucast_shared) {
+			/* Don't overwrite the unicast address */
+			mutex_lock(rule_lock);
+			if (ice_find_rule_entry(hw, ICE_SW_LKUP_MAC,
+						&m_list_itr->fltr_info)) {
+				mutex_unlock(rule_lock);
+				return ICE_ERR_ALREADY_EXISTS;
+			}
+			mutex_unlock(rule_lock);
+			num_unicast++;
+		} else if (is_multicast_ether_addr(add) ||
+			   (is_unicast_ether_addr(add) && hw->ucast_shared)) {
+			m_list_itr->status =
+				ice_add_rule_internal(hw, ICE_SW_LKUP_MAC,
+						      m_list_itr);
+			if (m_list_itr->status)
+				return m_list_itr->status;
+		}
+	}
+
+	mutex_lock(rule_lock);
+	/* Exit if no suitable entries were found for adding bulk switch rule */
+	if (!num_unicast) {
+		status = 0;
+		goto ice_add_mac_exit;
+	}
+
+	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
+
+	/* Allocate switch rule buffer for the bulk update for unicast */
+	s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE;
+	s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size,
+			      GFP_KERNEL);
+	if (!s_rule) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_add_mac_exit;
+	}
+
+	r_iter = s_rule;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
+		u8 *mac_addr = &f_info->l_data.mac.mac_addr[0];
+
+		if (is_unicast_ether_addr(mac_addr)) {
+			ice_fill_sw_rule(hw, &m_list_itr->fltr_info, r_iter,
+					 ice_aqc_opc_add_sw_rules);
+			r_iter = (struct ice_aqc_sw_rules_elem *)
+				((u8 *)r_iter + s_rule_size);
+		}
+	}
+
+	/* Call AQ bulk switch rule update for all unicast addresses */
+	r_iter = s_rule;
+	/* Call AQ switch rule in AQ_MAX chunk */
+	for (total_elem_left = num_unicast; total_elem_left > 0;
+	     total_elem_left -= elem_sent) {
+		struct ice_aqc_sw_rules_elem *entry = r_iter;
+
+		elem_sent = min_t(u8, total_elem_left,
+				  (ICE_AQ_MAX_BUF_LEN / s_rule_size));
+		status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size,
+					 elem_sent, ice_aqc_opc_add_sw_rules,
+					 NULL);
+		if (status)
+			goto ice_add_mac_exit;
+		r_iter = (struct ice_aqc_sw_rules_elem *)
+			((u8 *)r_iter + (elem_sent * s_rule_size));
+	}
+
+	/* Fill up rule ID based on the value returned from FW */
+	r_iter = s_rule;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
+		u8 *mac_addr = &f_info->l_data.mac.mac_addr[0];
+		struct ice_fltr_mgmt_list_entry *fm_entry;
+
+		if (is_unicast_ether_addr(mac_addr)) {
+			f_info->fltr_rule_id =
+				le16_to_cpu(r_iter->pdata.lkup_tx_rx.index);
+			f_info->fltr_act = ICE_FWD_TO_VSI;
+			/* Create an entry to track this MAC address */
+			fm_entry = devm_kzalloc(ice_hw_to_dev(hw),
+						sizeof(*fm_entry), GFP_KERNEL);
+			if (!fm_entry) {
+				status = ICE_ERR_NO_MEMORY;
+				goto ice_add_mac_exit;
+			}
+			fm_entry->fltr_info = *f_info;
+			fm_entry->vsi_count = 1;
+			/* The book keeping entries will get removed when
+			 * base driver calls remove filter AQ command
+			 */
+
+			list_add(&fm_entry->list_entry, rule_head);
+			r_iter = (struct ice_aqc_sw_rules_elem *)
+				((u8 *)r_iter + s_rule_size);
+		}
+	}
+
+ice_add_mac_exit:
+	mutex_unlock(rule_lock);
+	if (s_rule)
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_add_vlan_internal - Add one VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: filter entry containing one VLAN information
+ */
+static enum ice_status
+ice_add_vlan_internal(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *v_list_itr;
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	enum ice_sw_lkup_type lkup_type;
+	u16 vsi_list_id = 0, vsi_handle;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	enum ice_status status = 0;
+
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return ICE_ERR_PARAM;
+
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+	new_fltr = &f_entry->fltr_info;
+
+	/* VLAN ID should only be 12 bits */
+	if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID)
+		return ICE_ERR_PARAM;
+
+	if (new_fltr->src_id != ICE_SRC_ID_VSI)
+		return ICE_ERR_PARAM;
+
+	new_fltr->src = new_fltr->fwd_id.hw_vsi_id;
+	lkup_type = new_fltr->lkup_type;
+	vsi_handle = new_fltr->vsi_handle;
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+	mutex_lock(rule_lock);
+	v_list_itr = ice_find_rule_entry(hw, ICE_SW_LKUP_VLAN, new_fltr);
+	if (!v_list_itr) {
+		struct ice_vsi_list_map_info *map_info = NULL;
+
+		if (new_fltr->fltr_act == ICE_FWD_TO_VSI) {
+			/* All VLAN pruning rules use a VSI list. Check if
+			 * there is already a VSI list containing VSI that we
+			 * want to add. If found, use the same vsi_list_id for
+			 * this new VLAN rule or else create a new list.
+			 */
+			map_info = ice_find_vsi_list_entry(hw, ICE_SW_LKUP_VLAN,
+							   vsi_handle,
+							   &vsi_list_id);
+			if (!map_info) {
+				status = ice_create_vsi_list_rule(hw,
+								  &vsi_handle,
+								  1,
+								  &vsi_list_id,
+								  lkup_type);
+				if (status)
+					goto exit;
+			}
+			/* Convert the action to forwarding to a VSI list. */
+			new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+			new_fltr->fwd_id.vsi_list_id = vsi_list_id;
+		}
+
+		status = ice_create_pkt_fwd_rule(hw, f_entry);
+		if (!status) {
+			v_list_itr = ice_find_rule_entry(hw, ICE_SW_LKUP_VLAN,
+							 new_fltr);
+			if (!v_list_itr) {
+				status = ICE_ERR_DOES_NOT_EXIST;
+				goto exit;
+			}
+			/* reuse VSI list for new rule and increment ref_cnt */
+			if (map_info) {
+				v_list_itr->vsi_list_info = map_info;
+				map_info->ref_cnt++;
+			} else {
+				v_list_itr->vsi_list_info =
+					ice_create_vsi_list_map(hw, &vsi_handle,
+								1, vsi_list_id);
+			}
+		}
+	} else if (v_list_itr->vsi_list_info->ref_cnt == 1) {
+		/* Update existing VSI list to add new VSI ID only if it used
+		 * by one VLAN rule.
+		 */
+		cur_fltr = &v_list_itr->fltr_info;
+		status = ice_add_update_vsi_list(hw, v_list_itr, cur_fltr,
+						 new_fltr);
+	} else {
+		/* If VLAN rule exists and VSI list being used by this rule is
+		 * referenced by more than 1 VLAN rule. Then create a new VSI
+		 * list appending previous VSI with new VSI and update existing
+		 * VLAN rule to point to new VSI list ID
+		 */
+		struct ice_fltr_info tmp_fltr;
+		u16 vsi_handle_arr[2];
+		u16 cur_handle;
+
+		/* Current implementation only supports reusing VSI list with
+		 * one VSI count. We should never hit below condition
+		 */
+		if (v_list_itr->vsi_count > 1 &&
+		    v_list_itr->vsi_list_info->ref_cnt > 1) {
+			ice_debug(hw, ICE_DBG_SW,
+				  "Invalid configuration: Optimization to reuse VSI list with more than one VSI is not being done yet\n");
+			status = ICE_ERR_CFG;
+			goto exit;
+		}
+
+		cur_handle =
+			find_first_bit(v_list_itr->vsi_list_info->vsi_map,
+				       ICE_MAX_VSI);
+
+		/* A rule already exists with the new VSI being added */
+		if (cur_handle == vsi_handle) {
+			status = ICE_ERR_ALREADY_EXISTS;
+			goto exit;
+		}
+
+		vsi_handle_arr[0] = cur_handle;
+		vsi_handle_arr[1] = vsi_handle;
+		status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+						  &vsi_list_id, lkup_type);
+		if (status)
+			goto exit;
+
+		tmp_fltr = v_list_itr->fltr_info;
+		tmp_fltr.fltr_rule_id = v_list_itr->fltr_info.fltr_rule_id;
+		tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+		tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+		/* Update the previous switch rule to a new VSI list which
+		 * includes current VSI that is requested
+		 */
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+		if (status)
+			goto exit;
+
+		/* before overriding VSI list map info. decrement ref_cnt of
+		 * previous VSI list
+		 */
+		v_list_itr->vsi_list_info->ref_cnt--;
+
+		/* now update to newly created list */
+		v_list_itr->fltr_info.fwd_id.vsi_list_id = vsi_list_id;
+		v_list_itr->vsi_list_info =
+			ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+						vsi_list_id);
+		v_list_itr->vsi_count++;
+	}
+
+exit:
+	mutex_unlock(rule_lock);
+	return status;
+}
+
+/**
+ * ice_add_vlan - Add VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr;
+
+	if (!v_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(v_list_itr, v_list, list_entry) {
+		if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN)
+			return ICE_ERR_PARAM;
+		v_list_itr->fltr_info.flag = ICE_FLTR_TX;
+		v_list_itr->status = ice_add_vlan_internal(hw, v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_add_eth_mac - Add ethertype and MAC based filter rule
+ * @hw: pointer to the hardware structure
+ * @em_list: list of ether type MAC filter, MAC is optional
+ *
+ * This function requires the caller to populate the entries in
+ * the filter list with the necessary fields (including flags to
+ * indicate Tx or Rx rules).
+ */
+enum ice_status
+ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
+{
+	struct ice_fltr_list_entry *em_list_itr;
+
+	if (!em_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(em_list_itr, em_list, list_entry) {
+		enum ice_sw_lkup_type l_type =
+			em_list_itr->fltr_info.lkup_type;
+
+		if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC &&
+		    l_type != ICE_SW_LKUP_ETHERTYPE)
+			return ICE_ERR_PARAM;
+
+		em_list_itr->status = ice_add_rule_internal(hw, l_type,
+							    em_list_itr);
+		if (em_list_itr->status)
+			return em_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_remove_eth_mac - Remove an ethertype (or MAC) based filter rule
+ * @hw: pointer to the hardware structure
+ * @em_list: list of ethertype or ethertype MAC entries
+ */
+enum ice_status
+ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list)
+{
+	struct ice_fltr_list_entry *em_list_itr, *tmp;
+
+	if (!em_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry_safe(em_list_itr, tmp, em_list, list_entry) {
+		enum ice_sw_lkup_type l_type =
+			em_list_itr->fltr_info.lkup_type;
+
+		if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC &&
+		    l_type != ICE_SW_LKUP_ETHERTYPE)
+			return ICE_ERR_PARAM;
+
+		em_list_itr->status = ice_remove_rule_internal(hw, l_type,
+							       em_list_itr);
+		if (em_list_itr->status)
+			return em_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_rem_sw_rule_info
+ * @hw: pointer to the hardware structure
+ * @rule_head: pointer to the switch list structure that we want to delete
+ */
+static void
+ice_rem_sw_rule_info(struct ice_hw *hw, struct list_head *rule_head)
+{
+	if (!list_empty(rule_head)) {
+		struct ice_fltr_mgmt_list_entry *entry;
+		struct ice_fltr_mgmt_list_entry *tmp;
+
+		list_for_each_entry_safe(entry, tmp, rule_head, list_entry) {
+			list_del(&entry->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), entry);
+		}
+	}
+}
+
+/**
+ * ice_cfg_dflt_vsi - change state of VSI to set/clear default
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to set as default
+ * @set: true to add the above mentioned switch rule, false to remove it
+ * @direction: ICE_FLTR_RX or ICE_FLTR_TX
+ *
+ * add filter rule to set/unset given VSI as default VSI for the switch
+ * (represented by swid)
+ */
+enum ice_status
+ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_handle, bool set, u8 direction)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	struct ice_fltr_info f_info;
+	enum ice_adminq_opc opcode;
+	enum ice_status status;
+	u16 s_rule_size;
+	u16 hw_vsi_id;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	s_rule_size = set ? ICE_SW_RULE_RX_TX_ETH_HDR_SIZE :
+		ICE_SW_RULE_RX_TX_NO_HDR_SIZE;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	memset(&f_info, 0, sizeof(f_info));
+
+	f_info.lkup_type = ICE_SW_LKUP_DFLT;
+	f_info.flag = direction;
+	f_info.fltr_act = ICE_FWD_TO_VSI;
+	f_info.fwd_id.hw_vsi_id = hw_vsi_id;
+
+	if (f_info.flag & ICE_FLTR_RX) {
+		f_info.src = hw->port_info->lport;
+		f_info.src_id = ICE_SRC_ID_LPORT;
+		if (!set)
+			f_info.fltr_rule_id =
+				hw->port_info->dflt_rx_vsi_rule_id;
+	} else if (f_info.flag & ICE_FLTR_TX) {
+		f_info.src_id = ICE_SRC_ID_VSI;
+		f_info.src = hw_vsi_id;
+		if (!set)
+			f_info.fltr_rule_id =
+				hw->port_info->dflt_tx_vsi_rule_id;
+	}
+
+	if (set)
+		opcode = ice_aqc_opc_add_sw_rules;
+	else
+		opcode = ice_aqc_opc_remove_sw_rules;
+
+	ice_fill_sw_rule(hw, &f_info, s_rule, opcode);
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opcode, NULL);
+	if (status || !(f_info.flag & ICE_FLTR_TX_RX))
+		goto out;
+	if (set) {
+		u16 index = le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+
+		if (f_info.flag & ICE_FLTR_TX) {
+			hw->port_info->dflt_tx_vsi_num = hw_vsi_id;
+			hw->port_info->dflt_tx_vsi_rule_id = index;
+		} else if (f_info.flag & ICE_FLTR_RX) {
+			hw->port_info->dflt_rx_vsi_num = hw_vsi_id;
+			hw->port_info->dflt_rx_vsi_rule_id = index;
+		}
+	} else {
+		if (f_info.flag & ICE_FLTR_TX) {
+			hw->port_info->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL;
+			hw->port_info->dflt_tx_vsi_rule_id = ICE_INVAL_ACT;
+		} else if (f_info.flag & ICE_FLTR_RX) {
+			hw->port_info->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL;
+			hw->port_info->dflt_rx_vsi_rule_id = ICE_INVAL_ACT;
+		}
+	}
+
+out:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_find_ucast_rule_entry - Search for a unicast MAC filter rule entry
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type for which the specified rule needs to be searched
+ * @f_info: rule information
+ *
+ * Helper function to search for a unicast rule entry - this is to be used
+ * to remove unicast MAC filter that is not shared with other VSIs on the
+ * PF switch.
+ *
+ * Returns pointer to entry storing the rule if found
+ */
+static struct ice_fltr_mgmt_list_entry *
+ice_find_ucast_rule_entry(struct ice_hw *hw, u8 recp_id,
+			  struct ice_fltr_info *f_info)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *list_itr;
+	struct list_head *list_head;
+
+	list_head = &sw->recp_list[recp_id].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data,
+			    sizeof(f_info->l_data)) &&
+		    f_info->fwd_id.hw_vsi_id ==
+		    list_itr->fltr_info.fwd_id.hw_vsi_id &&
+		    f_info->flag == list_itr->fltr_info.flag)
+			return list_itr;
+	}
+	return NULL;
+}
+
+/**
+ * ice_remove_mac - remove a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * This function removes either a MAC filter rule or a specific VSI from a
+ * VSI list for a multicast MAC address.
+ *
+ * Returns ICE_ERR_DOES_NOT_EXIST if a given entry was not added by
+ * ice_add_mac. Caller should be aware that this call will only work if all
+ * the entries passed into m_list were added previously. It will not attempt to
+ * do a partial remove of entries that were found.
+ */
+enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_fltr_list_entry *list_itr, *tmp;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+
+	if (!m_list)
+		return ICE_ERR_PARAM;
+
+	rule_lock = &hw->switch_info->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+	list_for_each_entry_safe(list_itr, tmp, m_list, list_entry) {
+		enum ice_sw_lkup_type l_type = list_itr->fltr_info.lkup_type;
+		u8 *add = &list_itr->fltr_info.l_data.mac.mac_addr[0];
+		u16 vsi_handle;
+
+		if (l_type != ICE_SW_LKUP_MAC)
+			return ICE_ERR_PARAM;
+
+		vsi_handle = list_itr->fltr_info.vsi_handle;
+		if (!ice_is_vsi_valid(hw, vsi_handle))
+			return ICE_ERR_PARAM;
+
+		list_itr->fltr_info.fwd_id.hw_vsi_id =
+					ice_get_hw_vsi_num(hw, vsi_handle);
+		if (is_unicast_ether_addr(add) && !hw->ucast_shared) {
+			/* Don't remove the unicast address that belongs to
+			 * another VSI on the switch, since it is not being
+			 * shared...
+			 */
+			mutex_lock(rule_lock);
+			if (!ice_find_ucast_rule_entry(hw, ICE_SW_LKUP_MAC,
+						       &list_itr->fltr_info)) {
+				mutex_unlock(rule_lock);
+				return ICE_ERR_DOES_NOT_EXIST;
+			}
+			mutex_unlock(rule_lock);
+		}
+		list_itr->status = ice_remove_rule_internal(hw,
+							    ICE_SW_LKUP_MAC,
+							    list_itr);
+		if (list_itr->status)
+			return list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_remove_vlan - Remove VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+enum ice_status
+ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr, *tmp;
+
+	if (!v_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) {
+		enum ice_sw_lkup_type l_type = v_list_itr->fltr_info.lkup_type;
+
+		if (l_type != ICE_SW_LKUP_VLAN)
+			return ICE_ERR_PARAM;
+		v_list_itr->status = ice_remove_rule_internal(hw,
+							      ICE_SW_LKUP_VLAN,
+							      v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_vsi_uses_fltr - Determine if given VSI uses specified filter
+ * @fm_entry: filter entry to inspect
+ * @vsi_handle: VSI handle to compare with filter info
+ */
+static bool
+ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle)
+{
+	return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI &&
+		 fm_entry->fltr_info.vsi_handle == vsi_handle) ||
+		(fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST &&
+		 fm_entry->vsi_list_info &&
+		 (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map))));
+}
+
+/**
+ * ice_add_entry_to_vsi_fltr_list - Add copy of fltr_list_entry to remove list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @vsi_list_head: pointer to the list to add entry to
+ * @fi: pointer to fltr_info of filter entry to copy & add
+ *
+ * Helper function, used when creating a list of filters to remove from
+ * a specific VSI. The entry added to vsi_list_head is a COPY of the
+ * original filter entry, with the exception of fltr_info.fltr_act and
+ * fltr_info.fwd_id fields. These are set such that later logic can
+ * extract which VSI to remove the fltr from, and pass on that information.
+ */
+static enum ice_status
+ice_add_entry_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle,
+			       struct list_head *vsi_list_head,
+			       struct ice_fltr_info *fi)
+{
+	struct ice_fltr_list_entry *tmp;
+
+	/* this memory is freed up in the caller function
+	 * once filters for this VSI are removed
+	 */
+	tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return ICE_ERR_NO_MEMORY;
+
+	tmp->fltr_info = *fi;
+
+	/* Overwrite these fields to indicate which VSI to remove filter from,
+	 * so find and remove logic can extract the information from the
+	 * list entries. Note that original entries will still have proper
+	 * values.
+	 */
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.vsi_handle = vsi_handle;
+	tmp->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	list_add(&tmp->list_entry, vsi_list_head);
+
+	return 0;
+}
+
+/**
+ * ice_add_to_vsi_fltr_list - Add VSI filters to the list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @lkup_list_head: pointer to the list that has certain lookup type filters
+ * @vsi_list_head: pointer to the list pertaining to VSI with vsi_handle
+ *
+ * Locates all filters in lkup_list_head that are used by the given VSI,
+ * and adds COPIES of those entries to vsi_list_head (intended to be used
+ * to remove the listed filters).
+ * Note that this means all entries in vsi_list_head must be explicitly
+ * deallocated by the caller when done with list.
+ */
+static enum ice_status
+ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle,
+			 struct list_head *lkup_list_head,
+			 struct list_head *vsi_list_head)
+{
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	enum ice_status status = 0;
+
+	/* check to make sure VSI ID is valid and within boundary */
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(fm_entry, lkup_list_head, list_entry) {
+		if (!ice_vsi_uses_fltr(fm_entry, vsi_handle))
+			continue;
+
+		status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
+							vsi_list_head,
+							&fm_entry->fltr_info);
+		if (status)
+			return status;
+	}
+	return status;
+}
+
+/**
+ * ice_determine_promisc_mask
+ * @fi: filter info to parse
+ *
+ * Helper function to determine which ICE_PROMISC_ mask corresponds
+ * to given filter into.
+ */
+static u8 ice_determine_promisc_mask(struct ice_fltr_info *fi)
+{
+	u16 vid = fi->l_data.mac_vlan.vlan_id;
+	u8 *macaddr = fi->l_data.mac.mac_addr;
+	bool is_tx_fltr = false;
+	u8 promisc_mask = 0;
+
+	if (fi->flag == ICE_FLTR_TX)
+		is_tx_fltr = true;
+
+	if (is_broadcast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_BCAST_TX : ICE_PROMISC_BCAST_RX;
+	else if (is_multicast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_MCAST_TX : ICE_PROMISC_MCAST_RX;
+	else if (is_unicast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_UCAST_TX : ICE_PROMISC_UCAST_RX;
+	if (vid)
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_VLAN_TX : ICE_PROMISC_VLAN_RX;
+
+	return promisc_mask;
+}
+
+/**
+ * ice_remove_promisc - Remove promisc based filter rules
+ * @hw: pointer to the hardware structure
+ * @recp_id: recipe ID for which the rule needs to removed
+ * @v_list: list of promisc entries
+ */
+static enum ice_status
+ice_remove_promisc(struct ice_hw *hw, u8 recp_id,
+		   struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr, *tmp;
+
+	list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) {
+		v_list_itr->status =
+			ice_remove_rule_internal(hw, recp_id, v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_clear_vsi_promisc - clear specified promiscuous mode(s) for given VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to clear mode
+ * @promisc_mask: mask of promiscuous config bits to clear
+ * @vid: VLAN ID to clear VLAN promiscuous
+ */
+enum ice_status
+ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		      u16 vid)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *fm_entry, *tmp;
+	struct list_head remove_list_head;
+	struct ice_fltr_mgmt_list_entry *itr;
+	struct list_head *rule_head;
+	struct mutex *rule_lock;	/* Lock to protect filter rule list */
+	enum ice_status status = 0;
+	u8 recipe_id;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX))
+		recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
+	else
+		recipe_id = ICE_SW_LKUP_PROMISC;
+
+	rule_head = &sw->recp_list[recipe_id].filt_rules;
+	rule_lock = &sw->recp_list[recipe_id].filt_rule_lock;
+
+	INIT_LIST_HEAD(&remove_list_head);
+
+	mutex_lock(rule_lock);
+	list_for_each_entry(itr, rule_head, list_entry) {
+		struct ice_fltr_info *fltr_info;
+		u8 fltr_promisc_mask = 0;
+
+		if (!ice_vsi_uses_fltr(itr, vsi_handle))
+			continue;
+		fltr_info = &itr->fltr_info;
+
+		if (recipe_id == ICE_SW_LKUP_PROMISC_VLAN &&
+		    vid != fltr_info->l_data.mac_vlan.vlan_id)
+			continue;
+
+		fltr_promisc_mask |= ice_determine_promisc_mask(fltr_info);
+
+		/* Skip if filter is not completely specified by given mask */
+		if (fltr_promisc_mask & ~promisc_mask)
+			continue;
+
+		status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
+							&remove_list_head,
+							fltr_info);
+		if (status) {
+			mutex_unlock(rule_lock);
+			goto free_fltr_list;
+		}
+	}
+	mutex_unlock(rule_lock);
+
+	status = ice_remove_promisc(hw, recipe_id, &remove_list_head);
+
+free_fltr_list:
+	list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+		list_del(&fm_entry->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+	}
+
+	return status;
+}
+
+/**
+ * ice_set_vsi_promisc - set given VSI to given promiscuous mode(s)
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @vid: VLAN ID to set VLAN promiscuous
+ */
+enum ice_status
+ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, u16 vid)
+{
+	enum { UCAST_FLTR = 1, MCAST_FLTR, BCAST_FLTR };
+	struct ice_fltr_list_entry f_list_entry;
+	struct ice_fltr_info new_fltr;
+	enum ice_status status = 0;
+	bool is_tx_fltr;
+	u16 hw_vsi_id;
+	int pkt_type;
+	u8 recipe_id;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	memset(&new_fltr, 0, sizeof(new_fltr));
+
+	if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX)) {
+		new_fltr.lkup_type = ICE_SW_LKUP_PROMISC_VLAN;
+		new_fltr.l_data.mac_vlan.vlan_id = vid;
+		recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
+	} else {
+		new_fltr.lkup_type = ICE_SW_LKUP_PROMISC;
+		recipe_id = ICE_SW_LKUP_PROMISC;
+	}
+
+	/* Separate filters must be set for each direction/packet type
+	 * combination, so we will loop over the mask value, store the
+	 * individual type, and clear it out in the input mask as it
+	 * is found.
+	 */
+	while (promisc_mask) {
+		u8 *mac_addr;
+
+		pkt_type = 0;
+		is_tx_fltr = false;
+
+		if (promisc_mask & ICE_PROMISC_UCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_UCAST_RX;
+			pkt_type = UCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_UCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_UCAST_TX;
+			pkt_type = UCAST_FLTR;
+			is_tx_fltr = true;
+		} else if (promisc_mask & ICE_PROMISC_MCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_MCAST_RX;
+			pkt_type = MCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_MCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_MCAST_TX;
+			pkt_type = MCAST_FLTR;
+			is_tx_fltr = true;
+		} else if (promisc_mask & ICE_PROMISC_BCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_BCAST_RX;
+			pkt_type = BCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_BCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_BCAST_TX;
+			pkt_type = BCAST_FLTR;
+			is_tx_fltr = true;
+		}
+
+		/* Check for VLAN promiscuous flag */
+		if (promisc_mask & ICE_PROMISC_VLAN_RX) {
+			promisc_mask &= ~ICE_PROMISC_VLAN_RX;
+		} else if (promisc_mask & ICE_PROMISC_VLAN_TX) {
+			promisc_mask &= ~ICE_PROMISC_VLAN_TX;
+			is_tx_fltr = true;
+		}
+
+		/* Set filter DA based on packet type */
+		mac_addr = new_fltr.l_data.mac.mac_addr;
+		if (pkt_type == BCAST_FLTR) {
+			eth_broadcast_addr(mac_addr);
+		} else if (pkt_type == MCAST_FLTR ||
+			   pkt_type == UCAST_FLTR) {
+			/* Use the dummy ether header DA */
+			ether_addr_copy(mac_addr, dummy_eth_header);
+			if (pkt_type == MCAST_FLTR)
+				mac_addr[0] |= 0x1;	/* Set multicast bit */
+		}
+
+		/* Need to reset this to zero for all iterations */
+		new_fltr.flag = 0;
+		if (is_tx_fltr) {
+			new_fltr.flag |= ICE_FLTR_TX;
+			new_fltr.src = hw_vsi_id;
+		} else {
+			new_fltr.flag |= ICE_FLTR_RX;
+			new_fltr.src = hw->port_info->lport;
+		}
+
+		new_fltr.fltr_act = ICE_FWD_TO_VSI;
+		new_fltr.vsi_handle = vsi_handle;
+		new_fltr.fwd_id.hw_vsi_id = hw_vsi_id;
+		f_list_entry.fltr_info = new_fltr;
+
+		status = ice_add_rule_internal(hw, recipe_id, &f_list_entry);
+		if (status)
+			goto set_promisc_exit;
+	}
+
+set_promisc_exit:
+	return status;
+}
+
+/**
+ * ice_set_vlan_vsi_promisc
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @rm_vlan_promisc: Clear VLANs VSI promisc mode
+ *
+ * Configure VSI with all associated VLANs to given promiscuous mode(s)
+ */
+enum ice_status
+ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			 bool rm_vlan_promisc)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *list_itr, *tmp;
+	struct list_head vsi_list_head;
+	struct list_head *vlan_head;
+	struct mutex *vlan_lock; /* Lock to protect filter rule list */
+	enum ice_status status;
+	u16 vlan_id;
+
+	INIT_LIST_HEAD(&vsi_list_head);
+	vlan_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+	vlan_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
+	mutex_lock(vlan_lock);
+	status = ice_add_to_vsi_fltr_list(hw, vsi_handle, vlan_head,
+					  &vsi_list_head);
+	mutex_unlock(vlan_lock);
+	if (status)
+		goto free_fltr_list;
+
+	list_for_each_entry(list_itr, &vsi_list_head, list_entry) {
+		vlan_id = list_itr->fltr_info.l_data.vlan.vlan_id;
+		if (rm_vlan_promisc)
+			status = ice_clear_vsi_promisc(hw, vsi_handle,
+						       promisc_mask, vlan_id);
+		else
+			status = ice_set_vsi_promisc(hw, vsi_handle,
+						     promisc_mask, vlan_id);
+		if (status && status != -EEXIST)
+			break;
+	}
+
+free_fltr_list:
+	list_for_each_entry_safe(list_itr, tmp, &vsi_list_head, list_entry) {
+		list_del(&list_itr->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), list_itr);
+	}
+	return status;
+}
+
+/**
+ * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @lkup: switch rule filter lookup type
+ */
+static void
+ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle,
+			 enum ice_sw_lkup_type lkup)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *fm_entry;
+	struct list_head remove_list_head;
+	struct list_head *rule_head;
+	struct ice_fltr_list_entry *tmp;
+	struct mutex *rule_lock;	/* Lock to protect filter rule list */
+	enum ice_status status;
+
+	INIT_LIST_HEAD(&remove_list_head);
+	rule_lock = &sw->recp_list[lkup].filt_rule_lock;
+	rule_head = &sw->recp_list[lkup].filt_rules;
+	mutex_lock(rule_lock);
+	status = ice_add_to_vsi_fltr_list(hw, vsi_handle, rule_head,
+					  &remove_list_head);
+	mutex_unlock(rule_lock);
+	if (status)
+		goto free_fltr_list;
+
+	switch (lkup) {
+	case ICE_SW_LKUP_MAC:
+		ice_remove_mac(hw, &remove_list_head);
+		break;
+	case ICE_SW_LKUP_VLAN:
+		ice_remove_vlan(hw, &remove_list_head);
+		break;
+	case ICE_SW_LKUP_PROMISC:
+	case ICE_SW_LKUP_PROMISC_VLAN:
+		ice_remove_promisc(hw, lkup, &remove_list_head);
+		break;
+	case ICE_SW_LKUP_MAC_VLAN:
+	case ICE_SW_LKUP_ETHERTYPE:
+	case ICE_SW_LKUP_ETHERTYPE_MAC:
+	case ICE_SW_LKUP_DFLT:
+	case ICE_SW_LKUP_LAST:
+	default:
+		ice_debug(hw, ICE_DBG_SW, "Unsupported lookup type %d\n", lkup);
+		break;
+	}
+
+free_fltr_list:
+	list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+		list_del(&fm_entry->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+	}
+}
+
+/**
+ * ice_remove_vsi_fltr - Remove all filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ */
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle)
+{
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_MAC_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_DFLT);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_ETHERTYPE);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_ETHERTYPE_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC_VLAN);
+}
+
+/**
+ * ice_alloc_res_cntr - allocating resource counter
+ * @hw: pointer to the hardware structure
+ * @type: type of resource
+ * @alloc_shared: if set it is shared else dedicated
+ * @num_items: number of entries requested for FD resource type
+ * @counter_id: counter index returned by AQ call
+ */
+enum ice_status
+ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		   u16 *counter_id)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	enum ice_status status;
+	u16 buf_len;
+
+	/* Allocate resource */
+	buf_len = struct_size(buf, elem, 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	buf->num_elems = cpu_to_le16(num_items);
+	buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+				      ICE_AQC_RES_TYPE_M) | alloc_shared);
+
+	status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+				       ice_aqc_opc_alloc_res, NULL);
+	if (status)
+		goto exit;
+
+	*counter_id = le16_to_cpu(buf->elem[0].e.sw_resp);
+
+exit:
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_free_res_cntr - free resource counter
+ * @hw: pointer to the hardware structure
+ * @type: type of resource
+ * @alloc_shared: if set it is shared else dedicated
+ * @num_items: number of entries to be freed for FD resource type
+ * @counter_id: counter ID resource which needs to be freed
+ */
+enum ice_status
+ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		  u16 counter_id)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	enum ice_status status;
+	u16 buf_len;
+
+	/* Free resource */
+	buf_len = struct_size(buf, elem, 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	buf->num_elems = cpu_to_le16(num_items);
+	buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+				      ICE_AQC_RES_TYPE_M) | alloc_shared);
+	buf->elem[0].e.sw_resp = cpu_to_le16(counter_id);
+
+	status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+				       ice_aqc_opc_free_res, NULL);
+	if (status)
+		ice_debug(hw, ICE_DBG_SW,
+			  "counter resource could not be freed\n");
+
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_replay_vsi_fltr - Replay filters for requested VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ * @recp_id: Recipe ID for which rules need to be replayed
+ * @list_head: list for which filters need to be replayed
+ *
+ * Replays the filter of recipe recp_id for a VSI represented via vsi_handle.
+ * It is required to pass valid VSI handle.
+ */
+static enum ice_status
+ice_replay_vsi_fltr(struct ice_hw *hw, u16 vsi_handle, u8 recp_id,
+		    struct list_head *list_head)
+{
+	struct ice_fltr_mgmt_list_entry *itr;
+	enum ice_status status = 0;
+	u16 hw_vsi_id;
+
+	if (list_empty(list_head))
+		return status;
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	list_for_each_entry(itr, list_head, list_entry) {
+		struct ice_fltr_list_entry f_entry;
+
+		f_entry.fltr_info = itr->fltr_info;
+		if (itr->vsi_count < 2 && recp_id != ICE_SW_LKUP_VLAN &&
+		    itr->fltr_info.vsi_handle == vsi_handle) {
+			/* update the src in case it is VSI num */
+			if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
+				f_entry.fltr_info.src = hw_vsi_id;
+			status = ice_add_rule_internal(hw, recp_id, &f_entry);
+			if (status)
+				goto end;
+			continue;
+		}
+		if (!itr->vsi_list_info ||
+		    !test_bit(vsi_handle, itr->vsi_list_info->vsi_map))
+			continue;
+		/* Clearing it so that the logic can add it back */
+		clear_bit(vsi_handle, itr->vsi_list_info->vsi_map);
+		f_entry.fltr_info.vsi_handle = vsi_handle;
+		f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		/* update the src in case it is VSI num */
+		if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
+			f_entry.fltr_info.src = hw_vsi_id;
+		if (recp_id == ICE_SW_LKUP_VLAN)
+			status = ice_add_vlan_internal(hw, &f_entry);
+		else
+			status = ice_add_rule_internal(hw, recp_id, &f_entry);
+		if (status)
+			goto end;
+	}
+end:
+	return status;
+}
+
+/**
+ * ice_replay_vsi_all_fltr - replay all filters stored in bookkeeping lists
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ *
+ * Replays filters for requested VSI via vsi_handle.
+ */
+enum ice_status ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	enum ice_status status = 0;
+	u8 i;
+
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+		struct list_head *head;
+
+		head = &sw->recp_list[i].filt_replay_rules;
+		status = ice_replay_vsi_fltr(hw, vsi_handle, i, head);
+		if (status)
+			return status;
+	}
+	return status;
+}
+
+/**
+ * ice_rm_all_sw_replay_rule_info - deletes filter replay rules
+ * @hw: pointer to the HW struct
+ *
+ * Deletes the filter replay rules.
+ */
+void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	u8 i;
+
+	if (!sw)
+		return;
+
+	for (i = 0; i < ICE_SW_LKUP_LAST; i++) {
+		if (!list_empty(&sw->recp_list[i].filt_replay_rules)) {
+			struct list_head *l_head;
+
+			l_head = &sw->recp_list[i].filt_replay_rules;
+			ice_rem_sw_rule_info(hw, l_head);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
new file mode 100644
index 000000000..8b4f9d35c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SWITCH_H_
+#define _ICE_SWITCH_H_
+
+#include "ice_common.h"
+
+#define ICE_SW_CFG_MAX_BUF_LEN 2048
+#define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_FLTR_RX BIT(0)
+#define ICE_FLTR_TX BIT(1)
+#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX)
+#define ICE_VSI_INVAL_ID 0xffff
+#define ICE_INVAL_Q_HANDLE 0xFFFF
+
+/* VSI context structure for add/get/update/free operations */
+struct ice_vsi_ctx {
+	u16 vsi_num;
+	u16 vsis_allocd;
+	u16 vsis_unallocated;
+	u16 flags;
+	struct ice_aqc_vsi_props info;
+	struct ice_sched_vsi_info sched;
+	u8 alloc_from_pool;
+	u8 vf_num;
+	u16 num_lan_q_entries[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_q_ctx *lan_q_ctx[ICE_MAX_TRAFFIC_CLASS];
+};
+
+enum ice_sw_fwd_act_type {
+	ICE_FWD_TO_VSI = 0,
+	ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */
+	ICE_FWD_TO_Q,
+	ICE_FWD_TO_QGRP,
+	ICE_DROP_PACKET,
+	ICE_INVAL_ACT
+};
+
+/* Switch recipe ID enum values are specific to hardware */
+enum ice_sw_lkup_type {
+	ICE_SW_LKUP_ETHERTYPE = 0,
+	ICE_SW_LKUP_MAC = 1,
+	ICE_SW_LKUP_MAC_VLAN = 2,
+	ICE_SW_LKUP_PROMISC = 3,
+	ICE_SW_LKUP_VLAN = 4,
+	ICE_SW_LKUP_DFLT = 5,
+	ICE_SW_LKUP_ETHERTYPE_MAC = 8,
+	ICE_SW_LKUP_PROMISC_VLAN = 9,
+	ICE_SW_LKUP_LAST
+};
+
+/* type of filter src ID */
+enum ice_src_id {
+	ICE_SRC_ID_UNKNOWN = 0,
+	ICE_SRC_ID_VSI,
+	ICE_SRC_ID_QUEUE,
+	ICE_SRC_ID_LPORT,
+};
+
+struct ice_fltr_info {
+	/* Look up information: how to look up packet */
+	enum ice_sw_lkup_type lkup_type;
+	/* Forward action: filter action to do after lookup */
+	enum ice_sw_fwd_act_type fltr_act;
+	/* rule ID returned by firmware once filter rule is created */
+	u16 fltr_rule_id;
+	u16 flag;
+
+	/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
+	u16 src;
+	enum ice_src_id src_id;
+
+	union {
+		struct {
+			u8 mac_addr[ETH_ALEN];
+		} mac;
+		struct {
+			u8 mac_addr[ETH_ALEN];
+			u16 vlan_id;
+		} mac_vlan;
+		struct {
+			u16 vlan_id;
+		} vlan;
+		/* Set lkup_type as ICE_SW_LKUP_ETHERTYPE
+		 * if just using ethertype as filter. Set lkup_type as
+		 * ICE_SW_LKUP_ETHERTYPE_MAC if MAC also needs to be
+		 * passed in as filter.
+		 */
+		struct {
+			u16 ethertype;
+			u8 mac_addr[ETH_ALEN]; /* optional */
+		} ethertype_mac;
+	} l_data; /* Make sure to zero out the memory of l_data before using
+		   * it or only set the data associated with lookup match
+		   * rest everything should be zero
+		   */
+
+	/* Depending on filter action */
+	union {
+		/* queue ID in case of ICE_FWD_TO_Q and starting
+		 * queue ID in case of ICE_FWD_TO_QGRP.
+		 */
+		u16 q_id:11;
+		u16 hw_vsi_id:10;
+		u16 vsi_list_id:10;
+	} fwd_id;
+
+	/* Sw VSI handle */
+	u16 vsi_handle;
+
+	/* Set to num_queues if action is ICE_FWD_TO_QGRP. This field
+	 * determines the range of queues the packet needs to be forwarded to.
+	 * Note that qgrp_size must be set to a power of 2.
+	 */
+	u8 qgrp_size;
+
+	/* Rule creations populate these indicators basing on the switch type */
+	u8 lb_en;	/* Indicate if packet can be looped back */
+	u8 lan_en;	/* Indicate if packet can be forwarded to the uplink */
+};
+
+struct ice_sw_recipe {
+	struct list_head l_entry;
+
+	/* To protect modification of filt_rule list
+	 * defined below
+	 */
+	struct mutex filt_rule_lock;
+
+	/* List of type ice_fltr_mgmt_list_entry */
+	struct list_head filt_rules;
+	struct list_head filt_replay_rules;
+
+	/* linked list of type recipe_list_entry */
+	struct list_head rg_list;
+	/* linked list of type ice_sw_fv_list_entry*/
+	struct list_head fv_list;
+	struct ice_aqc_recipe_data_elem *r_buf;
+	u8 recp_count;
+	u8 root_rid;
+	u8 num_profs;
+	u8 *prof_ids;
+
+	/* recipe bitmap: what all recipes makes this recipe */
+	DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+};
+
+/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list ID */
+struct ice_vsi_list_map_info {
+	struct list_head list_entry;
+	DECLARE_BITMAP(vsi_map, ICE_MAX_VSI);
+	u16 vsi_list_id;
+	/* counter to track how many rules are reusing this VSI list */
+	u16 ref_cnt;
+};
+
+struct ice_fltr_list_entry {
+	struct list_head list_entry;
+	enum ice_status status;
+	struct ice_fltr_info fltr_info;
+};
+
+/* This defines an entry in the list that maintains MAC or VLAN membership
+ * to HW list mapping, since multiple VSIs can subscribe to the same MAC or
+ * VLAN. As an optimization the VSI list should be created only when a
+ * second VSI becomes a subscriber to the same MAC address. VSI lists are always
+ * used for VLAN membership.
+ */
+struct ice_fltr_mgmt_list_entry {
+	/* back pointer to VSI list ID to VSI list mapping */
+	struct ice_vsi_list_map_info *vsi_list_info;
+	u16 vsi_count;
+#define ICE_INVAL_LG_ACT_INDEX 0xffff
+	u16 lg_act_idx;
+#define ICE_INVAL_SW_MARKER_ID 0xffff
+	u16 sw_marker_id;
+	struct list_head list_entry;
+	struct ice_fltr_info fltr_info;
+#define ICE_INVAL_COUNTER_ID 0xff
+	u8 counter_index;
+};
+
+enum ice_promisc_flags {
+	ICE_PROMISC_UCAST_RX = 0x1,
+	ICE_PROMISC_UCAST_TX = 0x2,
+	ICE_PROMISC_MCAST_RX = 0x4,
+	ICE_PROMISC_MCAST_TX = 0x8,
+	ICE_PROMISC_BCAST_RX = 0x10,
+	ICE_PROMISC_BCAST_TX = 0x20,
+	ICE_PROMISC_VLAN_RX = 0x40,
+	ICE_PROMISC_VLAN_TX = 0x80,
+};
+
+/* VSI related commands */
+enum ice_status
+ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	    struct ice_sq_cd *cd);
+enum ice_status
+ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	     bool keep_vsi_alloc, struct ice_sq_cd *cd);
+enum ice_status
+ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd);
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle);
+struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle);
+void ice_clear_all_vsi_ctx(struct ice_hw *hw);
+/* Switch config */
+enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
+
+enum ice_status
+ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		   u16 *counter_id);
+enum ice_status
+ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		  u16 counter_id);
+
+/* Switch/bridge related commands */
+enum ice_status ice_update_sw_rule_bridge_mode(struct ice_hw *hw);
+enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst);
+enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst);
+enum ice_status
+ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+enum ice_status
+ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle);
+enum ice_status
+ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
+enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list);
+
+/* Promisc/defport setup for VSIs */
+enum ice_status
+ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_handle, bool set, u8 direction);
+enum ice_status
+ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		    u16 vid);
+enum ice_status
+ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		      u16 vid);
+enum ice_status
+ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			 bool rm_vlan_promisc);
+
+enum ice_status ice_init_def_sw_recp(struct ice_hw *hw);
+u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle);
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle);
+
+enum ice_status ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle);
+void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw);
+
+#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
new file mode 100644
index 000000000..442a9bcbf
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -0,0 +1,2562 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* The driver transmit and receive code */
+
+#include <linux/prefetch.h>
+#include <linux/mm.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp.h>
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+#include "ice.h"
+#include "ice_dcb_lib.h"
+#include "ice_xsk.h"
+
+#define ICE_RX_HDR_SIZE		256
+
+#define FDIR_DESC_RXDID 0x40
+#define ICE_FDIR_CLEAN_DELAY 10
+
+/**
+ * ice_prgm_fdir_fltr - Program a Flow Director filter
+ * @vsi: VSI to send dummy packet
+ * @fdir_desc: flow director descriptor
+ * @raw_packet: allocated buffer for flow director
+ */
+int
+ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
+		   u8 *raw_packet)
+{
+	struct ice_tx_buf *tx_buf, *first;
+	struct ice_fltr_desc *f_desc;
+	struct ice_tx_desc *tx_desc;
+	struct ice_ring *tx_ring;
+	struct device *dev;
+	dma_addr_t dma;
+	u32 td_cmd;
+	u16 i;
+
+	/* VSI and Tx ring */
+	if (!vsi)
+		return -ENOENT;
+	tx_ring = vsi->tx_rings[0];
+	if (!tx_ring || !tx_ring->desc)
+		return -ENOENT;
+	dev = tx_ring->dev;
+
+	/* we are using two descriptors to add/del a filter and we can wait */
+	for (i = ICE_FDIR_CLEAN_DELAY; ICE_DESC_UNUSED(tx_ring) < 2; i--) {
+		if (!i)
+			return -EAGAIN;
+		msleep_interruptible(1);
+	}
+
+	dma = dma_map_single(dev, raw_packet, ICE_FDIR_MAX_RAW_PKT_SIZE,
+			     DMA_TO_DEVICE);
+
+	if (dma_mapping_error(dev, dma))
+		return -EINVAL;
+
+	/* grab the next descriptor */
+	i = tx_ring->next_to_use;
+	first = &tx_ring->tx_buf[i];
+	f_desc = ICE_TX_FDIRDESC(tx_ring, i);
+	memcpy(f_desc, fdir_desc, sizeof(*f_desc));
+
+	i++;
+	i = (i < tx_ring->count) ? i : 0;
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	tx_buf = &tx_ring->tx_buf[i];
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	memset(tx_buf, 0, sizeof(*tx_buf));
+	dma_unmap_len_set(tx_buf, len, ICE_FDIR_MAX_RAW_PKT_SIZE);
+	dma_unmap_addr_set(tx_buf, dma, dma);
+
+	tx_desc->buf_addr = cpu_to_le64(dma);
+	td_cmd = ICE_TXD_LAST_DESC_CMD | ICE_TX_DESC_CMD_DUMMY |
+		 ICE_TX_DESC_CMD_RE;
+
+	tx_buf->tx_flags = ICE_TX_FLAGS_DUMMY_PKT;
+	tx_buf->raw_buf = raw_packet;
+
+	tx_desc->cmd_type_offset_bsz =
+		ice_build_ctob(td_cmd, 0, ICE_FDIR_MAX_RAW_PKT_SIZE, 0);
+
+	/* Force memory write to complete before letting h/w know
+	 * there are new descriptors to fetch.
+	 */
+	wmb();
+
+	/* mark the data descriptor to be watched */
+	first->next_to_watch = tx_desc;
+
+	writel(tx_ring->next_to_use, tx_ring->tail);
+
+	return 0;
+}
+
+/**
+ * ice_unmap_and_free_tx_buf - Release a Tx buffer
+ * @ring: the ring that owns the buffer
+ * @tx_buf: the buffer to free
+ */
+static void
+ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
+{
+	if (tx_buf->skb) {
+		if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
+			devm_kfree(ring->dev, tx_buf->raw_buf);
+		else if (ice_ring_is_xdp(ring))
+			page_frag_free(tx_buf->raw_buf);
+		else
+			dev_kfree_skb_any(tx_buf->skb);
+		if (dma_unmap_len(tx_buf, len))
+			dma_unmap_single(ring->dev,
+					 dma_unmap_addr(tx_buf, dma),
+					 dma_unmap_len(tx_buf, len),
+					 DMA_TO_DEVICE);
+	} else if (dma_unmap_len(tx_buf, len)) {
+		dma_unmap_page(ring->dev,
+			       dma_unmap_addr(tx_buf, dma),
+			       dma_unmap_len(tx_buf, len),
+			       DMA_TO_DEVICE);
+	}
+
+	tx_buf->next_to_watch = NULL;
+	tx_buf->skb = NULL;
+	dma_unmap_len_set(tx_buf, len, 0);
+	/* tx_buf must be completely set up in the transmit path */
+}
+
+static struct netdev_queue *txring_txq(const struct ice_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->q_index);
+}
+
+/**
+ * ice_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ */
+void ice_clean_tx_ring(struct ice_ring *tx_ring)
+{
+	u16 i;
+
+	if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
+		ice_xsk_clean_xdp_ring(tx_ring);
+		goto tx_skip_free;
+	}
+
+	/* ring already cleared, nothing to do */
+	if (!tx_ring->tx_buf)
+		return;
+
+	/* Free all the Tx ring sk_buffs */
+	for (i = 0; i < tx_ring->count; i++)
+		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
+
+tx_skip_free:
+	memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	if (!tx_ring->netdev)
+		return;
+
+	/* cleanup Tx queue statistics */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+}
+
+/**
+ * ice_free_tx_ring - Free Tx resources per queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void ice_free_tx_ring(struct ice_ring *tx_ring)
+{
+	ice_clean_tx_ring(tx_ring);
+	devm_kfree(tx_ring->dev, tx_ring->tx_buf);
+	tx_ring->tx_buf = NULL;
+
+	if (tx_ring->desc) {
+		dmam_free_coherent(tx_ring->dev, tx_ring->size,
+				   tx_ring->desc, tx_ring->dma);
+		tx_ring->desc = NULL;
+	}
+}
+
+/**
+ * ice_clean_tx_irq - Reclaim resources after transmit completes
+ * @tx_ring: Tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
+{
+	unsigned int total_bytes = 0, total_pkts = 0;
+	unsigned int budget = ICE_DFLT_IRQ_WORK;
+	struct ice_vsi *vsi = tx_ring->vsi;
+	s16 i = tx_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+
+	tx_buf = &tx_ring->tx_buf[i];
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	prefetch(&vsi->state);
+
+	do {
+		struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		smp_rmb();	/* prevent any other reads prior to eop_desc */
+
+		/* if the descriptor isn't done, no work yet to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buf->bytecount;
+		total_pkts += tx_buf->gso_segs;
+
+		if (ice_ring_is_xdp(tx_ring))
+			page_frag_free(tx_buf->raw_buf);
+		else
+			/* free the skb */
+			napi_consume_skb(tx_buf->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buf data */
+		tx_buf->skb = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buf++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buf = tx_ring->tx_buf;
+				tx_desc = ICE_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buf, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buf, dma),
+					       dma_unmap_len(tx_buf, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buf, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+
+	ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
+
+	if (ice_ring_is_xdp(tx_ring))
+		return !!budget;
+
+	netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
+				  total_bytes);
+
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
+	if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
+		     (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->q_index) &&
+		    !test_bit(__ICE_DOWN, vsi->state)) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->q_index);
+			++tx_ring->tx_stats.restart_q;
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ * ice_setup_tx_ring - Allocate the Tx descriptors
+ * @tx_ring: the Tx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_tx_ring(struct ice_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(tx_ring->tx_buf);
+	tx_ring->tx_buf =
+		devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count,
+			     GFP_KERNEL);
+	if (!tx_ring->tx_buf)
+		return -ENOMEM;
+
+	/* round up to nearest page */
+	tx_ring->size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
+			      PAGE_SIZE);
+	tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma,
+					    GFP_KERNEL);
+	if (!tx_ring->desc) {
+		dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+			tx_ring->size);
+		goto err;
+	}
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	tx_ring->tx_stats.prev_pkt = -1;
+	return 0;
+
+err:
+	devm_kfree(dev, tx_ring->tx_buf);
+	tx_ring->tx_buf = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ */
+void ice_clean_rx_ring(struct ice_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!rx_ring->rx_buf)
+		return;
+
+	if (rx_ring->xsk_pool) {
+		ice_xsk_clean_rx_ring(rx_ring);
+		goto rx_skip_free;
+	}
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+		if (rx_buf->skb) {
+			dev_kfree_skb(rx_buf->skb);
+			rx_buf->skb = NULL;
+		}
+		if (!rx_buf->page)
+			continue;
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(dev, rx_buf->dma,
+					      rx_buf->page_offset,
+					      rx_ring->rx_buf_len,
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
+
+		rx_buf->page = NULL;
+		rx_buf->page_offset = 0;
+	}
+
+rx_skip_free:
+	memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * ice_free_rx_ring - Free Rx resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+void ice_free_rx_ring(struct ice_ring *rx_ring)
+{
+	ice_clean_rx_ring(rx_ring);
+	if (rx_ring->vsi->type == ICE_VSI_PF)
+		if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+			xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	rx_ring->xdp_prog = NULL;
+	devm_kfree(rx_ring->dev, rx_ring->rx_buf);
+	rx_ring->rx_buf = NULL;
+
+	if (rx_ring->desc) {
+		dmam_free_coherent(rx_ring->dev, rx_ring->size,
+				   rx_ring->desc, rx_ring->dma);
+		rx_ring->desc = NULL;
+	}
+}
+
+/**
+ * ice_setup_rx_ring - Allocate the Rx descriptors
+ * @rx_ring: the Rx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_rx_ring(struct ice_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(rx_ring->rx_buf);
+	rx_ring->rx_buf =
+		devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count,
+			     GFP_KERNEL);
+	if (!rx_ring->rx_buf)
+		return -ENOMEM;
+
+	/* round up to nearest page */
+	rx_ring->size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
+			      PAGE_SIZE);
+	rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma,
+					    GFP_KERNEL);
+	if (!rx_ring->desc) {
+		dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+			rx_ring->size);
+		goto err;
+	}
+
+	rx_ring->next_to_use = 0;
+	rx_ring->next_to_clean = 0;
+
+	if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+		WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+	if (rx_ring->vsi->type == ICE_VSI_PF &&
+	    !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+				     rx_ring->q_index))
+			goto err;
+	return 0;
+
+err:
+	devm_kfree(dev, rx_ring->rx_buf);
+	rx_ring->rx_buf = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
+{
+	if (ice_ring_uses_build_skb(rx_ring))
+		return ICE_SKB_PAD;
+	else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+		return XDP_PACKET_HEADROOM;
+
+	return 0;
+}
+
+static unsigned int
+ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = ice_rx_offset(rx_ring) ?
+		SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
+	    struct bpf_prog *xdp_prog)
+{
+	struct ice_ring *xdp_ring;
+	int err, result;
+	u32 act;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		return ICE_XDP_PASS;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
+		result = ice_xmit_xdp_buff(xdp, xdp_ring);
+		if (result == ICE_XDP_CONSUMED)
+			goto out_failure;
+		return result;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		if (err)
+			goto out_failure;
+		return ICE_XDP_REDIR;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_DROP:
+		return ICE_XDP_CONSUMED;
+	}
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+	     u32 flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *xdp_ring;
+	int drops = 0, i;
+
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
+		return -ENXIO;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	xdp_ring = vsi->xdp_rings[queue_index];
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+		if (err != ICE_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		ice_xdp_ring_update_tail(xdp_ring);
+
+	return n - drops;
+}
+
+/**
+ * ice_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buf struct to modify
+ *
+ * Returns true if the page was successfully allocated or
+ * reused.
+ */
+static bool
+ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, ice_rx_pg_order(rx_ring));
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = ice_rx_offset(rx_ring);
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
+
+	return true;
+}
+
+/**
+ * ice_alloc_rx_bufs - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail. Returning
+ * true signals to the caller that we didn't replace cleaned_count buffers and
+ * there is more work to do.
+ *
+ * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
+ * buffers. Then bump tail at most one time. Grouping like this lets us avoid
+ * multiple tail writes per call.
+ */
+bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
+{
+	union ice_32b_rx_flex_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	struct ice_rx_buf *bi;
+
+	/* do nothing if no valid netdev defined */
+	if ((!rx_ring->netdev && rx_ring->vsi->type != ICE_VSI_CTRL) ||
+	    !cleaned_count)
+		return false;
+
+	/* get the Rx descriptor and buffer based on next_to_use */
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	bi = &rx_ring->rx_buf[ntu];
+
+	do {
+		/* if we fail here, we have work remaining */
+		if (!ice_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset,
+						 rx_ring->rx_buf_len,
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		ntu++;
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = ICE_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buf;
+			ntu = 0;
+		}
+
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->wb.status_error0 = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	return !!cleaned_count;
+}
+
+/**
+ * ice_page_is_reserved - check if reuse is possible
+ * @page: page struct to check
+ */
+static bool ice_page_is_reserved(struct page *page)
+{
+	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+/**
+ * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
+ * @rx_buf: Rx buffer to adjust
+ * @size: Size of adjustment
+ *
+ * Update the offset within page so that Rx buf will be ready to be reused.
+ * For systems with PAGE_SIZE < 8192 this function will flip the page offset
+ * so the second half of page assigned to Rx buffer will be used, otherwise
+ * the offset is moved by "size" bytes
+ */
+static void
+ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	/* flip page offset to other buffer */
+	rx_buf->page_offset ^= size;
+#else
+	/* move offset up to the next cache line */
+	rx_buf->page_offset += size;
+#endif
+}
+
+/**
+ * ice_can_reuse_rx_page - Determine if page can be reused for another Rx
+ * @rx_buf: buffer containing the page
+ * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call
+ *
+ * If page is reusable, we have a green light for calling ice_reuse_rx_page,
+ * which will assign the current buffer to the buffer that next_to_alloc is
+ * pointing to; otherwise, the DMA mapping needs to be destroyed and
+ * page freed
+ */
+static bool
+ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
+{
+	unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
+	struct page *page = rx_buf->page;
+
+	/* avoid re-using remote pages */
+	if (unlikely(ice_page_is_reserved(page)))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
+		return false;
+#else
+#define ICE_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+	if (rx_buf->page_offset > ICE_LAST_OFFSET)
+		return false;
+#endif /* PAGE_SIZE < 8192) */
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
+		rx_buf->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: packet length from rx_desc
+ *
+ * This function will add the data contained in rx_buf->page to the skb.
+ * It will just attach the page as a frag to the skb.
+ * The function will then update the page offset.
+ */
+static void
+ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+		struct sk_buff *skb, unsigned int size)
+{
+#if (PAGE_SIZE >= 8192)
+	unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring));
+#else
+	unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#endif
+
+	if (!size)
+		return;
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
+			rx_buf->page_offset, size, truesize);
+
+	/* page is being used so we must update the page offset */
+	ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+}
+
+/**
+ * ice_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: Rx descriptor ring to store buffers on
+ * @old_buf: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void
+ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct ice_rx_buf *new_buf;
+
+	new_buf = &rx_ring->rx_buf[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* Transfer page from old buffer to new buffer.
+	 * Move each member individually to avoid possible store
+	 * forwarding stalls and unnecessary copy of skb.
+	 */
+	new_buf->dma = old_buf->dma;
+	new_buf->page = old_buf->page;
+	new_buf->page_offset = old_buf->page_offset;
+	new_buf->pagecnt_bias = old_buf->pagecnt_bias;
+}
+
+/**
+ * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @skb: skb to be used
+ * @size: size of buffer to add to skb
+ * @rx_buf_pgcnt: rx_buf page refcount
+ *
+ * This function will pull an Rx buffer from the ring and synchronize it
+ * for use by the CPU.
+ */
+static struct ice_rx_buf *
+ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
+	       const unsigned int size, int *rx_buf_pgcnt)
+{
+	struct ice_rx_buf *rx_buf;
+
+	rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+	*rx_buf_pgcnt =
+#if (PAGE_SIZE < 8192)
+		page_count(rx_buf->page);
+#else
+		0;
+#endif
+	prefetchw(rx_buf->page);
+	*skb = rx_buf->skb;
+
+	if (!size)
+		return rx_buf;
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
+				      rx_buf->page_offset, size,
+				      DMA_FROM_DEVICE);
+
+	/* We have pulled a buffer for use, so decrement pagecnt_bias */
+	rx_buf->pagecnt_bias--;
+
+	return rx_buf;
+}
+
+/**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *
+ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+	      struct xdp_buff *xdp)
+{
+	u8 metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	struct sk_buff *skb;
+
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points exactly as xdp->data, otherwise we
+	 * likely have a consumer accessing first few bytes of meta
+	 * data, and then actual data.
+	 */
+	net_prefetch(xdp->data_meta);
+	/* build an skb around the page buffer */
+	skb = build_skb(xdp->data_hard_start, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* must to record Rx queue, otherwise OS features such as
+	 * symmetric queue won't work
+	 */
+	skb_record_rx_queue(skb, rx_ring->q_index);
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	/* buffer is used by skb, update page_offset */
+	ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+
+	return skb;
+}
+
+/**
+ * ice_construct_skb - Allocate skb and populate it
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function allocates an skb. It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
+ */
+static struct sk_buff *
+ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+		  struct xdp_buff *xdp)
+{
+	unsigned int size = xdp->data_end - xdp->data;
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data);
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_record_rx_queue(skb, rx_ring->q_index);
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > ICE_RX_HDR_SIZE)
+		headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+							 sizeof(long)));
+
+	/* if we exhaust the linear part then add what is left as a frag */
+	size -= headlen;
+	if (size) {
+#if (PAGE_SIZE >= 8192)
+		unsigned int truesize = SKB_DATA_ALIGN(size);
+#else
+		unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#endif
+		skb_add_rx_frag(skb, 0, rx_buf->page,
+				rx_buf->page_offset + headlen, size, truesize);
+		/* buffer is used by skb, update page_offset */
+		ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+	} else {
+		/* buffer is unused, reset bias back to rx_buf; data was copied
+		 * onto skb's linear part so there's no need for adjusting
+		 * page offset and we can reuse this buffer as-is
+		 */
+		rx_buf->pagecnt_bias++;
+	}
+
+	return skb;
+}
+
+/**
+ * ice_put_rx_buf - Clean up used buffer and either recycle or free
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect()
+ *
+ * This function will update next_to_clean and then clean up the contents
+ * of the rx_buf. It will either recycle the buffer or unmap it and free
+ * the associated resources.
+ */
+static void
+ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+	       int rx_buf_pgcnt)
+{
+	u16 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	if (!rx_buf)
+		return;
+
+	if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) {
+		/* hand second half of page back to the ring */
+		ice_reuse_rx_page(rx_ring, rx_buf);
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
+				     ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+				     ICE_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
+	}
+
+	/* clear contents of buffer_info */
+	rx_buf->page = NULL;
+	rx_buf->skb = NULL;
+}
+
+/**
+ * ice_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
+ */
+static bool
+ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+	       struct sk_buff *skb)
+{
+	/* if we are the last buffer then there is nothing else to do */
+#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
+	if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
+		return false;
+
+	/* place skb in next buffer to be received */
+	rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
+	rx_ring->rx_stats.non_eop_descs++;
+
+	return true;
+}
+
+/**
+ * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ */
+int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	unsigned int xdp_res, xdp_xmit = 0;
+	struct bpf_prog *xdp_prog = NULL;
+	struct xdp_buff xdp;
+	bool failure;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	xdp.frame_sz = ice_rx_frame_truesize(rx_ring, 0);
+#endif
+
+	/* start the loop to process Rx packets bounded by 'budget' */
+	while (likely(total_rx_pkts < (unsigned int)budget)) {
+		union ice_32b_rx_flex_desc *rx_desc;
+		struct ice_rx_buf *rx_buf;
+		struct sk_buff *skb;
+		unsigned int size;
+		u16 stat_err_bits;
+		int rx_buf_pgcnt;
+		u16 vlan_tag = 0;
+		u8 rx_ptype;
+
+		/* get the Rx desc from Rx ring based on 'next_to_clean' */
+		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		/* status_error_len will always be zero for unused descriptors
+		 * because it's cleared in cleanup, and overlaps with hdr_addr
+		 * which is always zero because packet split isn't used, if the
+		 * hardware wrote DD then it will be non-zero
+		 */
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+		if (!ice_test_staterr(rx_desc, stat_err_bits))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * DD bit is set.
+		 */
+		dma_rmb();
+
+		if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
+			ice_put_rx_buf(rx_ring, NULL, 0);
+			cleaned_count++;
+			continue;
+		}
+
+		size = le16_to_cpu(rx_desc->wb.pkt_len) &
+			ICE_RX_FLX_DESC_PKT_LEN_M;
+
+		/* retrieve a buffer from the ring */
+		rx_buf = ice_get_rx_buf(rx_ring, &skb, size, &rx_buf_pgcnt);
+
+		if (!size) {
+			xdp.data = NULL;
+			xdp.data_end = NULL;
+			xdp.data_hard_start = NULL;
+			xdp.data_meta = NULL;
+			goto construct_skb;
+		}
+
+		xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
+		xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
+		xdp.data_meta = xdp.data;
+		xdp.data_end = xdp.data + size;
+#if (PAGE_SIZE > 4096)
+		/* At larger PAGE_SIZE, frame_sz depend on len size */
+		xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
+#endif
+
+		rcu_read_lock();
+		xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+		if (!xdp_prog) {
+			rcu_read_unlock();
+			goto construct_skb;
+		}
+
+		xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
+		rcu_read_unlock();
+		if (!xdp_res)
+			goto construct_skb;
+		if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+			xdp_xmit |= xdp_res;
+			ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz);
+		} else {
+			rx_buf->pagecnt_bias++;
+		}
+		total_rx_bytes += size;
+		total_rx_pkts++;
+
+		cleaned_count++;
+		ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
+		continue;
+construct_skb:
+		if (skb) {
+			ice_add_rx_frag(rx_ring, rx_buf, skb, size);
+		} else if (likely(xdp.data)) {
+			if (ice_ring_uses_build_skb(rx_ring))
+				skb = ice_build_skb(rx_ring, rx_buf, &xdp);
+			else
+				skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
+		}
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_buf_failed++;
+			if (rx_buf)
+				rx_buf->pagecnt_bias++;
+			break;
+		}
+
+		ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
+		cleaned_count++;
+
+		/* skip if it is NOP desc */
+		if (ice_is_non_eop(rx_ring, rx_desc, skb))
+			continue;
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
+		if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) {
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+		if (ice_test_staterr(rx_desc, stat_err_bits))
+			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+		/* pad the skb if needed, to make a valid ethernet frame */
+		if (eth_skb_pad(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		/* populate checksum, VLAN, and protocol */
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+			ICE_RX_FLEX_DESC_PTYPE_M;
+
+		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+
+		/* send completed skb up the stack */
+		ice_receive_skb(rx_ring, skb, vlan_tag);
+
+		/* update budget accounting */
+		total_rx_pkts++;
+	}
+
+	/* return up to cleaned_count buffers to hardware */
+	failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
+
+	if (xdp_prog)
+		ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+
+	ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
+
+	/* guarantee a trip back through this routine if there was a failure */
+	return failure ? budget : (int)total_rx_pkts;
+}
+
+/**
+ * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic
+ * @port_info: port_info structure containing the current link speed
+ * @avg_pkt_size: average size of Tx or Rx packets based on clean routine
+ * @itr: ITR value to update
+ *
+ * Calculate how big of an increment should be applied to the ITR value passed
+ * in based on wmem_default, SKB overhead, ethernet overhead, and the current
+ * link speed.
+ *
+ * The following is a calculation derived from:
+ *  wmem_default / (size + overhead) = desired_pkts_per_int
+ *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to:
+ *
+ *	 wmem_default * bits_per_byte * usecs_per_sec   pkt_size + 24
+ * ITR = -------------------------------------------- * --------------
+ *			     rate			pkt_size + 640
+ */
+static unsigned int
+ice_adjust_itr_by_size_and_speed(struct ice_port_info *port_info,
+				 unsigned int avg_pkt_size,
+				 unsigned int itr)
+{
+	switch (port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		itr += DIV_ROUND_UP(17 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		itr += DIV_ROUND_UP(34 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_40GB:
+		itr += DIV_ROUND_UP(43 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		itr += DIV_ROUND_UP(68 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		itr += DIV_ROUND_UP(85 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+	default:
+		itr += DIV_ROUND_UP(170 * (avg_pkt_size + 24),
+				    avg_pkt_size + 640);
+		break;
+	}
+
+	if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= ICE_ITR_ADAPTIVE_LATENCY;
+		itr += ICE_ITR_ADAPTIVE_MAX_USECS;
+	}
+
+	return itr;
+}
+
+/**
+ * ice_update_itr - update the adaptive ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
+ * @rc: structure containing ring performance data
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ */
+static void
+ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc)
+{
+	unsigned long next_update = jiffies;
+	unsigned int packets, bytes, itr;
+	bool container_is_rx;
+
+	if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting))
+		return;
+
+	/* If itr_countdown is set it means we programmed an ITR within
+	 * the last 4 interrupt cycles. This has a side effect of us
+	 * potentially firing an early interrupt. In order to work around
+	 * this we need to throw out any data received for a few
+	 * interrupts following the update.
+	 */
+	if (q_vector->itr_countdown) {
+		itr = rc->target_itr;
+		goto clear_counts;
+	}
+
+	container_is_rx = (&q_vector->rx == rc);
+	/* For Rx we want to push the delay up and default to low latency.
+	 * for Tx we want to pull the delay down and default to high latency.
+	 */
+	itr = container_is_rx ?
+		ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY :
+		ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, rc->next_update))
+		goto clear_counts;
+
+	prefetch(q_vector->vsi->port_info);
+
+	packets = rc->total_pkts;
+	bytes = rc->total_bytes;
+
+	if (container_is_rx) {
+		/* If Rx there are 1 to 4 packets and bytes are less than
+		 * 9000 assume insufficient data to use bulk rate limiting
+		 * approach unless Tx is already in bulk rate limiting. We
+		 * are likely latency driven.
+		 */
+		if (packets && packets < 4 && bytes < 9000 &&
+		    (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) {
+			itr = ICE_ITR_ADAPTIVE_LATENCY;
+			goto adjust_by_size_and_speed;
+		}
+	} else if (packets < 4) {
+		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
+		 * bulk mode and we are receiving 4 or fewer packets just
+		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+		 * that the Rx can relax.
+		 */
+		if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & ICE_ITR_MASK) ==
+		    ICE_ITR_ADAPTIVE_MAX_USECS)
+			goto clear_counts;
+	} else if (packets > 32) {
+		/* If we have processed over 32 packets in a single interrupt
+		 * for Tx assume we need to switch over to "bulk" mode.
+		 */
+		rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY;
+	}
+
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
+	 *
+	 * Between 4 and 56 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
+	 */
+	if (packets < 56) {
+		itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= ICE_ITR_ADAPTIVE_LATENCY;
+			itr += ICE_ITR_ADAPTIVE_MAX_USECS;
+		}
+		goto clear_counts;
+	}
+
+	if (packets <= 256) {
+		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+		itr &= ICE_ITR_MASK;
+
+		/* Between 56 and 112 is our "goldilocks" zone where we are
+		 * working out "just right". Just report that our current
+		 * ITR is good for us.
+		 */
+		if (packets <= 112)
+			goto clear_counts;
+
+		/* If packet count is 128 or greater we are likely looking
+		 * at a slight overrun of the delay we want. Try halving
+		 * our delay to see if that will cut the number of packets
+		 * in half per interrupt.
+		 */
+		itr >>= 1;
+		itr &= ICE_ITR_MASK;
+		if (itr < ICE_ITR_ADAPTIVE_MIN_USECS)
+			itr = ICE_ITR_ADAPTIVE_MIN_USECS;
+
+		goto clear_counts;
+	}
+
+	/* The paths below assume we are dealing with a bulk ITR since
+	 * number of packets is greater than 256. We are just going to have
+	 * to compute a value and try to bring the count under control,
+	 * though for smaller packet sizes there isn't much we can do as
+	 * NAPI polling will likely be kicking in sooner rather than later.
+	 */
+	itr = ICE_ITR_ADAPTIVE_BULK;
+
+adjust_by_size_and_speed:
+
+	/* based on checks above packets cannot be 0 so division is safe */
+	itr = ice_adjust_itr_by_size_and_speed(q_vector->vsi->port_info,
+					       bytes / packets, itr);
+
+clear_counts:
+	/* write back value */
+	rc->target_itr = itr;
+
+	/* next update should occur within next jiffy */
+	rc->next_update = next_update + 1;
+
+	rc->total_bytes = 0;
+	rc->total_pkts = 0;
+}
+
+/**
+ * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
+ * @itr_idx: interrupt throttling index
+ * @itr: interrupt throttling value in usecs
+ */
+static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
+{
+	/* The ITR value is reported in microseconds, and the register value is
+	 * recorded in 2 microsecond units. For this reason we only need to
+	 * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
+	 * granularity as a shift instead of division. The mask makes sure the
+	 * ITR value is never odd so we don't accidentally write into the field
+	 * prior to the ITR field.
+	 */
+	itr &= ICE_ITR_MASK;
+
+	return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+		(itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
+		(itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
+}
+
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
+
+/**
+ * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt
+ * @q_vector: q_vector for which ITR is being updated and interrupt enabled
+ */
+static void ice_update_ena_itr(struct ice_q_vector *q_vector)
+{
+	struct ice_ring_container *tx = &q_vector->tx;
+	struct ice_ring_container *rx = &q_vector->rx;
+	struct ice_vsi *vsi = q_vector->vsi;
+	u32 itr_val;
+
+	/* when exiting WB_ON_ITR lets set a low ITR value and trigger
+	 * interrupts to expire right away in case we have more work ready to go
+	 * already
+	 */
+	if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) {
+		itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS);
+		wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
+		/* set target back to last user set value */
+		rx->target_itr = rx->itr_setting;
+		/* set current to what we just wrote and dynamic if needed */
+		rx->current_itr = ICE_WB_ON_ITR_USECS |
+			(rx->itr_setting & ICE_ITR_DYNAMIC);
+		/* allow normal interrupt flow to start */
+		q_vector->itr_countdown = 0;
+		return;
+	}
+
+	/* This will do nothing if dynamic updates are not enabled */
+	ice_update_itr(q_vector, tx);
+	ice_update_itr(q_vector, rx);
+
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
+	 */
+	if (rx->target_itr < rx->current_itr) {
+		/* Rx ITR needs to be reduced, this is highest priority */
+		itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
+		rx->current_itr = rx->target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if ((tx->target_itr < tx->current_itr) ||
+		   ((rx->target_itr - rx->current_itr) <
+		    (tx->target_itr - tx->current_itr))) {
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
+		 */
+		itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr);
+		tx->current_itr = tx->target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if (rx->current_itr != rx->target_itr) {
+		/* Rx ITR needs to be increased, third priority */
+		itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
+		rx->current_itr = rx->target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else {
+		/* Still have to re-enable the interrupts */
+		itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+		if (q_vector->itr_countdown)
+			q_vector->itr_countdown--;
+	}
+
+	if (!test_bit(__ICE_DOWN, q_vector->vsi->state))
+		wr32(&q_vector->vsi->back->hw,
+		     GLINT_DYN_CTL(q_vector->reg_idx),
+		     itr_val);
+}
+
+/**
+ * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector
+ * @q_vector: q_vector to set WB_ON_ITR on
+ *
+ * We need to tell hardware to write-back completed descriptors even when
+ * interrupts are disabled. Descriptors will be written back on cache line
+ * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
+ * descriptors may not be written back if they don't fill a cache line until the
+ * next interrupt.
+ *
+ * This sets the write-back frequency to 2 microseconds as that is the minimum
+ * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to
+ * make sure hardware knows we aren't meddling with the INTENA_M bit.
+ */
+static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
+{
+	struct ice_vsi *vsi = q_vector->vsi;
+
+	/* already in WB_ON_ITR mode no need to change it */
+	if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
+		return;
+
+	if (q_vector->num_ring_rx)
+		wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+		     ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
+						 ICE_RX_ITR));
+
+	if (q_vector->num_ring_tx)
+		wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+		     ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
+						 ICE_TX_ITR));
+
+	q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE;
+}
+
+/**
+ * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ *
+ * Returns the amount of work done
+ */
+int ice_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct ice_q_vector *q_vector =
+				container_of(napi, struct ice_q_vector, napi);
+	bool clean_complete = true;
+	struct ice_ring *ring;
+	int budget_per_ring;
+	int work_done = 0;
+
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	ice_for_each_ring(ring, q_vector->tx) {
+		bool wd = ring->xsk_pool ?
+			  ice_clean_tx_irq_zc(ring, budget) :
+			  ice_clean_tx_irq(ring, budget);
+
+		if (!wd)
+			clean_complete = false;
+	}
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (unlikely(budget <= 0))
+		return budget;
+
+	/* normally we have 1 Rx ring per q_vector */
+	if (unlikely(q_vector->num_ring_rx > 1))
+		/* We attempt to distribute budget to each Rx queue fairly, but
+		 * don't allow the budget to go below 1 because that would exit
+		 * polling early.
+		 */
+		budget_per_ring = max_t(int, budget / q_vector->num_ring_rx, 1);
+	else
+		/* Max of 1 Rx ring in this q_vector so give it the budget */
+		budget_per_ring = budget;
+
+	ice_for_each_ring(ring, q_vector->rx) {
+		int cleaned;
+
+		/* A dedicated path for zero-copy allows making a single
+		 * comparison in the irq context instead of many inside the
+		 * ice_clean_rx_irq function and makes the codebase cleaner.
+		 */
+		cleaned = ring->xsk_pool ?
+			  ice_clean_rx_irq_zc(ring, budget_per_ring) :
+			  ice_clean_rx_irq(ring, budget_per_ring);
+		work_done += cleaned;
+		/* if we clean as many as budgeted, we must not be done */
+		if (cleaned >= budget_per_ring)
+			clean_complete = false;
+	}
+
+	/* If work not completed, return budget and polling will return */
+	if (!clean_complete)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		ice_update_ena_itr(q_vector);
+	else
+		ice_set_wb_on_itr(q_vector);
+
+	return min_t(int, work_done, budget - 1);
+}
+
+/**
+ * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size: the size buffer we want to assure is available
+ *
+ * Returns -EBUSY if a stop is needed, else 0
+ */
+static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
+{
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index);
+	/* Memory barrier before checking head and tail */
+	smp_mb();
+
+	/* Check again in a case another CPU has just made room available. */
+	if (likely(ICE_DESC_UNUSED(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_subqueue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->q_index);
+	++tx_ring->tx_stats.restart_q;
+	return 0;
+}
+
+/**
+ * ice_maybe_stop_tx - 1st level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ */
+static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
+{
+	if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
+		return 0;
+
+	return __ice_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * ice_tx_map - Build the Tx descriptor
+ * @tx_ring: ring to send buffer on
+ * @first: first buffer info buffer to use
+ * @off: pointer to struct that holds offload parameters
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit descriptor.
+ */
+static void
+ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
+	   struct ice_tx_offload_params *off)
+{
+	u64 td_offset, td_tag, td_cmd;
+	u16 i = tx_ring->next_to_use;
+	unsigned int data_len, size;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	struct sk_buff *skb;
+	skb_frag_t *frag;
+	dma_addr_t dma;
+
+	td_tag = off->td_l2tag1;
+	td_cmd = off->td_cmd;
+	td_offset = off->td_offset;
+	skb = first->skb;
+
+	data_len = skb->data_len;
+	size = skb_headlen(skb);
+
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+
+	if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
+		td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
+		td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
+			  ICE_TX_FLAGS_VLAN_S;
+	}
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buf = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buf, len, size);
+		dma_unmap_addr_set(tx_buf, dma, dma);
+
+		/* align size to end of page */
+		max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+
+		/* account for data chunks larger than the hardware
+		 * can handle
+		 */
+		while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
+			tx_desc->cmd_type_offset_bsz =
+				ice_build_ctob(td_cmd, td_offset, max_data,
+					       td_tag);
+
+			tx_desc++;
+			i++;
+
+			if (i == tx_ring->count) {
+				tx_desc = ICE_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+
+			dma += max_data;
+			size -= max_data;
+
+			max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+			tx_desc->buf_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->cmd_type_offset_bsz = ice_build_ctob(td_cmd, td_offset,
+							      size, td_tag);
+
+		tx_desc++;
+		i++;
+
+		if (i == tx_ring->count) {
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buf = &tx_ring->tx_buf[i];
+	}
+
+	/* record bytecount for BQL */
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* record SW timestamp if HW timestamp is not available */
+	skb_tx_timestamp(first->skb);
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	/* write last descriptor with RS and EOP bits */
+	td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+	tx_desc->cmd_type_offset_bsz =
+			ice_build_ctob(td_cmd, td_offset, size, td_tag);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 *
+	 * We also use this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	tx_ring->next_to_use = i;
+
+	ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	/* notify HW of packet */
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more())
+		writel(i, tx_ring->tail);
+
+	return;
+
+dma_error:
+	/* clear DMA mappings for failed tx_buf map */
+	for (;;) {
+		tx_buf = &tx_ring->tx_buf[i];
+		ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
+		if (tx_buf == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	tx_ring->next_to_use = i;
+}
+
+/**
+ * ice_tx_csum - Enable Tx checksum offloads
+ * @first: pointer to the first descriptor
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
+ */
+static
+int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	u32 l4_len = 0, l3_len = 0, l2_len = 0;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	__be16 frag_off, protocol;
+	unsigned char *exthdr;
+	u32 offset, cmd = 0;
+	u8 l4_proto = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* compute outer L2 header size */
+	l2_len = ip.hdr - skb->data;
+	offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
+
+	protocol = vlan_get_protocol(skb);
+
+	if (protocol == htons(ETH_P_IP))
+		first->tx_flags |= ICE_TX_FLAGS_IPV4;
+	else if (protocol == htons(ETH_P_IPV6))
+		first->tx_flags |= ICE_TX_FLAGS_IPV6;
+
+	if (skb->encapsulation) {
+		bool gso_ena = false;
+		u32 tunnel = 0;
+
+		/* define outer network header type */
+		if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
+			tunnel |= (first->tx_flags & ICE_TX_FLAGS_TSO) ?
+				  ICE_TX_CTX_EIPT_IPV4 :
+				  ICE_TX_CTX_EIPT_IPV4_NO_CSUM;
+			l4_proto = ip.v4->protocol;
+		} else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
+			int ret;
+
+			tunnel |= ICE_TX_CTX_EIPT_IPV6;
+			exthdr = ip.hdr + sizeof(*ip.v6);
+			l4_proto = ip.v6->nexthdr;
+			ret = ipv6_skip_exthdr(skb, exthdr - skb->data,
+					       &l4_proto, &frag_off);
+			if (ret < 0)
+				return -1;
+		}
+
+		/* define outer transport */
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			tunnel |= ICE_TXD_CTX_UDP_TUNNELING;
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			break;
+		case IPPROTO_GRE:
+			tunnel |= ICE_TXD_CTX_GRE_TUNNELING;
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			break;
+		case IPPROTO_IPIP:
+		case IPPROTO_IPV6:
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			l4.hdr = skb_inner_network_header(skb);
+			break;
+		default:
+			if (first->tx_flags & ICE_TX_FLAGS_TSO)
+				return -1;
+
+			skb_checksum_help(skb);
+			return 0;
+		}
+
+		/* compute outer L3 header size */
+		tunnel |= ((l4.hdr - ip.hdr) / 4) <<
+			  ICE_TXD_CTX_QW0_EIPLEN_S;
+
+		/* switch IP header pointer from outer to inner header */
+		ip.hdr = skb_inner_network_header(skb);
+
+		/* compute tunnel header size */
+		tunnel |= ((ip.hdr - l4.hdr) / 2) <<
+			   ICE_TXD_CTX_QW0_NATLEN_S;
+
+		gso_ena = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
+		/* indicate if we need to offload outer UDP header */
+		if ((first->tx_flags & ICE_TX_FLAGS_TSO) && !gso_ena &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+			tunnel |= ICE_TXD_CTX_QW0_L4T_CS_M;
+
+		/* record tunnel offload values */
+		off->cd_tunnel_params |= tunnel;
+
+		/* set DTYP=1 to indicate that it's an Tx context descriptor
+		 * in IPsec tunnel mode with Tx offloads in Quad word 1
+		 */
+		off->cd_qw1 |= (u64)ICE_TX_DESC_DTYPE_CTX;
+
+		/* switch L4 header pointer from outer to inner */
+		l4.hdr = skb_inner_transport_header(skb);
+		l4_proto = 0;
+
+		/* reset type as we transition from outer to inner headers */
+		first->tx_flags &= ~(ICE_TX_FLAGS_IPV4 | ICE_TX_FLAGS_IPV6);
+		if (ip.v4->version == 4)
+			first->tx_flags |= ICE_TX_FLAGS_IPV4;
+		if (ip.v6->version == 6)
+			first->tx_flags |= ICE_TX_FLAGS_IPV6;
+	}
+
+	/* Enable IP checksum offloads */
+	if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
+		l4_proto = ip.v4->protocol;
+		/* the stack computes the IP header already, the only time we
+		 * need the hardware to recompute it is in the case of TSO.
+		 */
+		if (first->tx_flags & ICE_TX_FLAGS_TSO)
+			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
+		else
+			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
+
+	} else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
+		cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
+		exthdr = ip.hdr + sizeof(*ip.v6);
+		l4_proto = ip.v6->nexthdr;
+		if (l4.hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto,
+					 &frag_off);
+	} else {
+		return -1;
+	}
+
+	/* compute inner L3 header size */
+	l3_len = l4.hdr - ip.hdr;
+	offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S;
+
+	/* Enable L4 checksum offloads */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		/* enable checksum offloads */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+		l4_len = l4.tcp->doff;
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case IPPROTO_UDP:
+		/* enable UDP checksum offload */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+		l4_len = (sizeof(struct udphdr) >> 2);
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case IPPROTO_SCTP:
+		/* enable SCTP checksum offload */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
+		l4_len = sizeof(struct sctphdr) >> 2;
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+
+	default:
+		if (first->tx_flags & ICE_TX_FLAGS_TSO)
+			return -1;
+		skb_checksum_help(skb);
+		return 0;
+	}
+
+	off->td_cmd |= cmd;
+	off->td_offset |= offset;
+	return 1;
+}
+
+/**
+ * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ *
+ * Checks the skb and set up correspondingly several generic transmit flags
+ * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
+ */
+static void
+ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first)
+{
+	struct sk_buff *skb = first->skb;
+
+	/* nothing left to do, software offloaded VLAN */
+	if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol))
+		return;
+
+	/* currently, we always assume 802.1Q for VLAN insertion as VLAN
+	 * insertion for 802.1AD is not supported
+	 */
+	if (skb_vlan_tag_present(skb)) {
+		first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
+		first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+	}
+
+	ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
+}
+
+/**
+ * ice_tso - computes mss and TSO length to prepare for TSO
+ * @first: pointer to struct ice_tx_buf
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
+ */
+static
+int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u64 cd_mss, cd_tso_len;
+	u32 paylen;
+	u8 l4_start;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	/* cppcheck-suppress unreadVariable */
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		ip.v4->tot_len = 0;
+		ip.v4->check = 0;
+	} else {
+		ip.v6->payload_len = 0;
+	}
+
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+					 SKB_GSO_GRE_CSUM |
+					 SKB_GSO_IPXIP4 |
+					 SKB_GSO_IPXIP6 |
+					 SKB_GSO_UDP_TUNNEL |
+					 SKB_GSO_UDP_TUNNEL_CSUM)) {
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
+			l4.udp->len = 0;
+
+			/* determine offset of outer transport header */
+			l4_start = (u8)(l4.hdr - skb->data);
+
+			/* remove payload length from outer checksum */
+			paylen = skb->len - l4_start;
+			csum_replace_by_diff(&l4.udp->check,
+					     (__force __wsum)htonl(paylen));
+		}
+
+		/* reset pointers to inner headers */
+
+		/* cppcheck-suppress unreadVariable */
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+
+		/* initialize inner IP header fields */
+		if (ip.v4->version == 4) {
+			ip.v4->tot_len = 0;
+			ip.v4->check = 0;
+		} else {
+			ip.v6->payload_len = 0;
+		}
+	}
+
+	/* determine offset of transport header */
+	l4_start = (u8)(l4.hdr - skb->data);
+
+	/* remove payload length from checksum */
+	paylen = skb->len - l4_start;
+
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of UDP segmentation header */
+		off->header_len = (u8)sizeof(l4.udp) + l4_start;
+	} else {
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of TCP segmentation header */
+		off->header_len = (u8)((l4.tcp->doff * 4) + l4_start);
+	}
+
+	/* update gso_segs and bytecount */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * off->header_len;
+
+	cd_tso_len = skb->len - off->header_len;
+	cd_mss = skb_shinfo(skb)->gso_size;
+
+	/* record cdesc_qw1 with TSO parameters */
+	off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+			     (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+			     (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+			     (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
+	first->tx_flags |= ICE_TX_FLAGS_TSO;
+	return 1;
+}
+
+/**
+ * ice_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment. For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
+ */
+static unsigned int ice_txd_use_count(unsigned int size)
+{
+	return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
+}
+
+/**
+ * ice_xmit_desc_count - calculate number of Tx descriptors needed
+ * @skb: send buffer
+ *
+ * Returns number of data descriptors needed for this skb.
+ */
+static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
+{
+	const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	unsigned int count = 0, size = skb_headlen(skb);
+
+	for (;;) {
+		count += ice_txd_use_count(size);
+
+		if (!nr_frags--)
+			break;
+
+		size = skb_frag_size(frag++);
+	}
+
+	return count;
+}
+
+/**
+ * __ice_chk_linearize - Check if there are more than 8 buffers per packet
+ * @skb: send buffer
+ *
+ * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
+ */
+static bool __ice_chk_linearize(struct sk_buff *skb)
+{
+	const skb_frag_t *frag, *stale;
+	int nr_frags, sum;
+
+	/* no need to check if number of frags is less than 7 */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags < (ICE_MAX_BUF_TXD - 1))
+		return false;
+
+	/* We need to walk through the list and validate that each group
+	 * of 6 fragments totals at least gso_size.
+	 */
+	nr_frags -= ICE_MAX_BUF_TXD - 2;
+	frag = &skb_shinfo(skb)->frags[0];
+
+	/* Initialize size to the negative value of gso_size minus 1. We
+	 * use this as the worst case scenario in which the frag ahead
+	 * of us only provides one byte which is why we are limited to 6
+	 * descriptors for a single transmit as the header and previous
+	 * fragment are already consuming 2 descriptors.
+	 */
+	sum = 1 - skb_shinfo(skb)->gso_size;
+
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+
+	/* Walk through fragments adding latest fragment, testing it, and
+	 * then removing stale fragments from the sum.
+	 */
+	for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+		int stale_size = skb_frag_size(stale);
+
+		sum += skb_frag_size(frag++);
+
+		/* The stale fragment may present us with a smaller
+		 * descriptor than the actual fragment size. To account
+		 * for that we need to remove all the data on the front and
+		 * figure out what the remainder would be in the last
+		 * descriptor associated with the fragment.
+		 */
+		if (stale_size > ICE_MAX_DATA_PER_TXD) {
+			int align_pad = -(skb_frag_off(stale)) &
+					(ICE_MAX_READ_REQ_SIZE - 1);
+
+			sum -= align_pad;
+			stale_size -= align_pad;
+
+			do {
+				sum -= ICE_MAX_DATA_PER_TXD_ALIGNED;
+				stale_size -= ICE_MAX_DATA_PER_TXD_ALIGNED;
+			} while (stale_size > ICE_MAX_DATA_PER_TXD);
+		}
+
+		/* if sum is negative we failed to make sufficient progress */
+		if (sum < 0)
+			return true;
+
+		if (!nr_frags--)
+			break;
+
+		sum -= stale_size;
+	}
+
+	return false;
+}
+
+/**
+ * ice_chk_linearize - Check if there are more than 8 fragments per packet
+ * @skb:      send buffer
+ * @count:    number of buffers used
+ *
+ * Note: Our HW can't scatter-gather more than 8 fragments to build
+ * a packet on the wire and so we need to figure out the cases where we
+ * need to linearize the skb.
+ */
+static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
+{
+	/* Both TSO and single send will work if count is less than 8 */
+	if (likely(count < ICE_MAX_BUF_TXD))
+		return false;
+
+	if (skb_is_gso(skb))
+		return __ice_chk_linearize(skb);
+
+	/* we can support up to 8 data buffers for a single send */
+	return count != ICE_MAX_BUF_TXD;
+}
+
+/**
+ * ice_xmit_frame_ring - Sends buffer on Tx ring
+ * @skb: send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t
+ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
+{
+	struct ice_tx_offload_params offload = { 0 };
+	struct ice_vsi *vsi = tx_ring->vsi;
+	struct ice_tx_buf *first;
+	struct ethhdr *eth;
+	unsigned int count;
+	int tso, csum;
+
+	count = ice_xmit_desc_count(skb);
+	if (ice_chk_linearize(skb, count)) {
+		if (__skb_linearize(skb))
+			goto out_drop;
+		count = ice_txd_use_count(skb->len);
+		tx_ring->tx_stats.tx_linearize++;
+	}
+
+	/* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
+	 *       + 4 desc gap to avoid the cache line where head is,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE +
+			      ICE_DESCS_FOR_CTX_DESC)) {
+		tx_ring->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	offload.tx_ring = tx_ring;
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buf[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+	first->gso_segs = 1;
+	first->tx_flags = 0;
+
+	/* prepare the VLAN tagging flags for Tx */
+	ice_tx_prepare_vlan_flags(tx_ring, first);
+
+	/* set up TSO offload */
+	tso = ice_tso(first, &offload);
+	if (tso < 0)
+		goto out_drop;
+
+	/* always set up Tx checksum offload */
+	csum = ice_tx_csum(first, &offload);
+	if (csum < 0)
+		goto out_drop;
+
+	/* allow CONTROL frames egress from main VSI if FW LLDP disabled */
+	eth = (struct ethhdr *)skb_mac_header(skb);
+	if (unlikely((skb->priority == TC_PRIO_CONTROL ||
+		      eth->h_proto == htons(ETH_P_LLDP)) &&
+		     vsi->type == ICE_VSI_PF &&
+		     vsi->port_info->qos_cfg.is_sw_lldp))
+		offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+					ICE_TX_CTX_DESC_SWTCH_UPLINK <<
+					ICE_TXD_CTX_QW1_CMD_S);
+
+	if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
+		struct ice_tx_ctx_desc *cdesc;
+		u16 i = tx_ring->next_to_use;
+
+		/* grab the next descriptor */
+		cdesc = ICE_TX_CTX_DESC(tx_ring, i);
+		i++;
+		tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+		/* setup context descriptor */
+		cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params);
+		cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2);
+		cdesc->rsvd = cpu_to_le16(0);
+		cdesc->qw1 = cpu_to_le64(offload.cd_qw1);
+	}
+
+	ice_tx_map(tx_ring, first, &offload);
+	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/**
+ * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *tx_ring;
+
+	tx_ring = vsi->tx_rings[skb->queue_mapping];
+
+	/* hardware can't handle really short frames, hardware padding works
+	 * beyond this point
+	 */
+	if (skb_put_padto(skb, ICE_MIN_TX_LEN))
+		return NETDEV_TX_OK;
+
+	return ice_xmit_frame_ring(skb, tx_ring);
+}
+
+/**
+ * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue
+ * @tx_ring: tx_ring to clean
+ */
+void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring)
+{
+	struct ice_vsi *vsi = tx_ring->vsi;
+	s16 i = tx_ring->next_to_clean;
+	int budget = ICE_DFLT_IRQ_WORK;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+
+	tx_buf = &tx_ring->tx_buf[i];
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no pending work */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if the descriptor isn't done, no work to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+		tx_desc->buf_addr = 0;
+		tx_desc->cmd_type_offset_bsz = 0;
+
+		/* move past filter desc */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		/* unmap the data header */
+		if (dma_unmap_len(tx_buf, len))
+			dma_unmap_single(tx_ring->dev,
+					 dma_unmap_addr(tx_buf, dma),
+					 dma_unmap_len(tx_buf, len),
+					 DMA_TO_DEVICE);
+		if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
+			devm_kfree(tx_ring->dev, tx_buf->raw_buf);
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->raw_buf = NULL;
+		tx_buf->tx_flags = 0;
+		tx_buf->next_to_watch = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+		tx_desc->buf_addr = 0;
+		tx_desc->cmd_type_offset_bsz = 0;
+
+		/* move past eop_desc for start of next FD desc */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+
+	/* re-enable interrupt if needed */
+	ice_irq_dynamic_ena(&vsi->back->hw, vsi, vsi->q_vectors[0]);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
new file mode 100644
index 000000000..eab7ceae9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TXRX_H_
+#define _ICE_TXRX_H_
+
+#include "ice_type.h"
+
+#define ICE_DFLT_IRQ_WORK	256
+#define ICE_RXBUF_3072		3072
+#define ICE_RXBUF_2048		2048
+#define ICE_RXBUF_1536		1536
+#define ICE_MAX_CHAINED_RX_BUFS	5
+#define ICE_MAX_BUF_TXD		8
+#define ICE_MIN_TX_LEN		17
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define ICE_MAX_READ_REQ_SIZE	4096
+#define ICE_MAX_DATA_PER_TXD	(16 * 1024 - 1)
+#define ICE_MAX_DATA_PER_TXD_ALIGNED \
+	(~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD)
+
+#define ICE_RX_BUF_WRITE	16	/* Must be power of 2 */
+#define ICE_MAX_TXQ_PER_TXQG	128
+
+/* Attempt to maximize the headroom available for incoming frames. We use a 2K
+ * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
+ * This leaves us with 512 bytes of room.  From that we need to deduct the
+ * space needed for the shared info and the padding needed to IP align the
+ * frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *	 up negative.  In these cases we should fall back to the legacy
+ *	 receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define ICE_2K_TOO_SMALL_WITH_PADDING \
+	((unsigned int)(NET_SKB_PAD + ICE_RXBUF_1536) > \
+			SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+
+/**
+ * ice_compute_pad - compute the padding
+ * @rx_buf_len: buffer length
+ *
+ * Figure out the size of half page based on given buffer length and
+ * then subtract the skb_shared_info followed by subtraction of the
+ * actual buffer length; this in turn results in the actual space that
+ * is left for padding usage
+ */
+static inline int ice_compute_pad(int rx_buf_len)
+{
+	int half_page_size;
+
+	half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
+}
+
+/**
+ * ice_skb_pad - determine the padding that we can supply
+ *
+ * Figure out the right Rx buffer size and based on that calculate the
+ * padding
+ */
+static inline int ice_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (ICE_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = ICE_RXBUF_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return ice_compute_pad(rx_buf_len);
+}
+
+#define ICE_SKB_PAD ice_skb_pad()
+#else
+#define ICE_2K_TOO_SMALL_WITH_PADDING false
+#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
+/* We are assuming that the cache line is always 64 Bytes here for ice.
+ * In order to make sure that is a correct assumption there is a check in probe
+ * to print a warning if the read from GLPCI_CNF2 tells us that the cache line
+ * size is 128 bytes. We do it this way because we do not want to read the
+ * GLPCI_CNF2 register or a variable containing the value on every pass through
+ * the Tx path.
+ */
+#define ICE_CACHE_LINE_BYTES		64
+#define ICE_DESCS_PER_CACHE_LINE	(ICE_CACHE_LINE_BYTES / \
+					 sizeof(struct ice_tx_desc))
+#define ICE_DESCS_FOR_CTX_DESC		1
+#define ICE_DESCS_FOR_SKB_DATA_PTR	1
+/* Tx descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \
+		     ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR)
+#define ICE_DESC_UNUSED(R)	\
+	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	      (R)->next_to_clean - (R)->next_to_use - 1)
+
+#define ICE_TX_FLAGS_TSO	BIT(0)
+#define ICE_TX_FLAGS_HW_VLAN	BIT(1)
+#define ICE_TX_FLAGS_SW_VLAN	BIT(2)
+/* ICE_TX_FLAGS_DUMMY_PKT is used to mark dummy packets that should be
+ * freed instead of returned like skb packets.
+ */
+#define ICE_TX_FLAGS_DUMMY_PKT	BIT(3)
+#define ICE_TX_FLAGS_IPV4	BIT(5)
+#define ICE_TX_FLAGS_IPV6	BIT(6)
+#define ICE_TX_FLAGS_TUNNEL	BIT(7)
+#define ICE_TX_FLAGS_VLAN_M	0xffff0000
+#define ICE_TX_FLAGS_VLAN_PR_M	0xe0000000
+#define ICE_TX_FLAGS_VLAN_PR_S	29
+#define ICE_TX_FLAGS_VLAN_S	16
+
+#define ICE_XDP_PASS		0
+#define ICE_XDP_CONSUMED	BIT(0)
+#define ICE_XDP_TX		BIT(1)
+#define ICE_XDP_REDIR		BIT(2)
+
+#define ICE_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+#define ICE_ETH_PKT_HDR_PAD	(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
+struct ice_tx_buf {
+	struct ice_tx_desc *next_to_watch;
+	union {
+		struct sk_buff *skb;
+		void *raw_buf; /* used for XDP */
+	};
+	unsigned int bytecount;
+	unsigned short gso_segs;
+	u32 tx_flags;
+	DEFINE_DMA_UNMAP_LEN(len);
+	DEFINE_DMA_UNMAP_ADDR(dma);
+};
+
+struct ice_tx_offload_params {
+	u64 cd_qw1;
+	struct ice_ring *tx_ring;
+	u32 td_cmd;
+	u32 td_offset;
+	u32 td_l2tag1;
+	u32 cd_tunnel_params;
+	u16 cd_l2tag2;
+	u8 header_len;
+};
+
+struct ice_rx_buf {
+	union {
+		struct {
+			struct sk_buff *skb;
+			dma_addr_t dma;
+			struct page *page;
+			unsigned int page_offset;
+			u16 pagecnt_bias;
+		};
+		struct {
+			struct xdp_buff *xdp;
+		};
+	};
+};
+
+struct ice_q_stats {
+	u64 pkts;
+	u64 bytes;
+};
+
+struct ice_txq_stats {
+	u64 restart_q;
+	u64 tx_busy;
+	u64 tx_linearize;
+	int prev_pkt; /* negative if no pending Tx descriptors */
+};
+
+struct ice_rxq_stats {
+	u64 non_eop_descs;
+	u64 alloc_page_failed;
+	u64 alloc_buf_failed;
+	u64 gro_dropped; /* GRO returned dropped */
+};
+
+/* this enum matches hardware bits and is meant to be used by DYN_CTLN
+ * registers and QINT registers or more generally anywhere in the manual
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+enum ice_dyn_idx_t {
+	ICE_IDX_ITR0 = 0,
+	ICE_IDX_ITR1 = 1,
+	ICE_IDX_ITR2 = 2,
+	ICE_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
+};
+
+/* Header split modes defined by DTYPE field of Rx RLAN context */
+enum ice_rx_dtype {
+	ICE_RX_DTYPE_NO_SPLIT		= 0,
+	ICE_RX_DTYPE_HEADER_SPLIT	= 1,
+	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
+};
+
+/* indices into GLINT_ITR registers */
+#define ICE_RX_ITR	ICE_IDX_ITR0
+#define ICE_TX_ITR	ICE_IDX_ITR1
+#define ICE_ITR_8K	124
+#define ICE_ITR_20K	50
+#define ICE_ITR_MAX	8160
+#define ICE_DFLT_TX_ITR	(ICE_ITR_20K | ICE_ITR_DYNAMIC)
+#define ICE_DFLT_RX_ITR	(ICE_ITR_20K | ICE_ITR_DYNAMIC)
+#define ICE_ITR_DYNAMIC	0x8000  /* used as flag for itr_setting */
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC))
+#define ITR_TO_REG(setting)	((setting) & ~ICE_ITR_DYNAMIC)
+#define ICE_ITR_GRAN_S		1	/* ITR granularity is always 2us */
+#define ICE_ITR_GRAN_US		BIT(ICE_ITR_GRAN_S)
+#define ICE_ITR_MASK		0x1FFE	/* ITR register value alignment mask */
+#define ITR_REG_ALIGN(setting)	((setting) & ICE_ITR_MASK)
+
+#define ICE_ITR_ADAPTIVE_MIN_INC	0x0002
+#define ICE_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define ICE_ITR_ADAPTIVE_MAX_USECS	0x00FA
+#define ICE_ITR_ADAPTIVE_LATENCY	0x8000
+#define ICE_ITR_ADAPTIVE_BULK		0x0000
+
+#define ICE_DFLT_INTRL	0
+#define ICE_MAX_INTRL	236
+
+#define ICE_WB_ON_ITR_USECS	2
+#define ICE_IN_WB_ON_ITR_MODE	255
+/* Sets WB_ON_ITR and assumes INTENA bit is already cleared, which allows
+ * setting the MSK_M bit to tell hardware to ignore the INTENA_M bit. Also,
+ * set the write-back latency to the usecs passed in.
+ */
+#define ICE_GLINT_DYN_CTL_WB_ON_ITR(usecs, itr_idx)	\
+	((((usecs) << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)) & \
+	  GLINT_DYN_CTL_INTERVAL_M) | \
+	 (((itr_idx) << GLINT_DYN_CTL_ITR_INDX_S) & \
+	  GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | \
+	 GLINT_DYN_CTL_WB_ON_ITR_M)
+
+/* Legacy or Advanced Mode Queue */
+#define ICE_TX_ADVANCED	0
+#define ICE_TX_LEGACY	1
+
+/* descriptor ring, associated with a VSI */
+struct ice_ring {
+	/* CL1 - 1st cacheline starts here */
+	struct ice_ring *next;		/* pointer to next ring in q_vector */
+	void *desc;			/* Descriptor ring memory */
+	struct device *dev;		/* Used for DMA mapping */
+	struct net_device *netdev;	/* netdev ring maps to */
+	struct ice_vsi *vsi;		/* Backreference to associated VSI */
+	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
+	u8 __iomem *tail;
+	union {
+		struct ice_tx_buf *tx_buf;
+		struct ice_rx_buf *rx_buf;
+	};
+	/* CL2 - 2nd cacheline starts here */
+	u16 q_index;			/* Queue number of ring */
+	u16 q_handle;			/* Queue handle per TC */
+
+	u8 ring_active:1;		/* is ring online or not */
+
+	u16 count;			/* Number of descriptors */
+	u16 reg_idx;			/* HW register index of the ring */
+
+	/* used in interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 next_to_alloc;
+
+	/* stats structs */
+	struct ice_q_stats	stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct ice_txq_stats tx_stats;
+		struct ice_rxq_stats rx_stats;
+	};
+
+	struct rcu_head rcu;		/* to avoid race on free */
+	struct bpf_prog *xdp_prog;
+	struct xsk_buff_pool *xsk_pool;
+	/* CL3 - 3rd cacheline starts here */
+	struct xdp_rxq_info xdp_rxq;
+	/* CLX - the below items are only accessed infrequently and should be
+	 * in their own cache line if possible
+	 */
+#define ICE_TX_FLAGS_RING_XDP		BIT(0)
+#define ICE_RX_FLAGS_RING_BUILD_SKB	BIT(1)
+	u8 flags;
+	dma_addr_t dma;			/* physical address of ring */
+	unsigned int size;		/* length of descriptor ring in bytes */
+	u32 txq_teid;			/* Added Tx queue TEID */
+	u16 rx_buf_len;
+	u8 dcb_tc;			/* Traffic class of ring */
+} ____cacheline_internodealigned_in_smp;
+
+static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)
+{
+	return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
+}
+
+static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring)
+{
+	ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring)
+{
+	ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline bool ice_ring_is_xdp(struct ice_ring *ring)
+{
+	return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
+struct ice_ring_container {
+	/* head of linked-list of rings */
+	struct ice_ring *ring;
+	unsigned long next_update;	/* jiffies value of next queue update */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_pkts;	/* total packets processed this int */
+	u16 itr_idx;		/* index in the interrupt vector */
+	u16 target_itr;		/* value in usecs divided by the hw->itr_gran */
+	u16 current_itr;	/* value in usecs divided by the hw->itr_gran */
+	/* high bit set means dynamic ITR, rest is used to store user
+	 * readable ITR value in usecs and must be converted before programming
+	 * to a register.
+	 */
+	u16 itr_setting;
+};
+
+struct ice_coalesce_stored {
+	u16 itr_tx;
+	u16 itr_rx;
+	u8 intrl;
+	u8 tx_valid;
+	u8 rx_valid;
+};
+
+/* iterator for handling rings in ring container */
+#define ice_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos; pos = pos->next)
+
+static inline unsigned int ice_rx_pg_order(struct ice_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring->rx_buf_len > (PAGE_SIZE / 2))
+		return 1;
+#endif
+	return 0;
+}
+
+#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
+
+union ice_32b_rx_flex_desc;
+
+bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+void ice_clean_tx_ring(struct ice_ring *tx_ring);
+void ice_clean_rx_ring(struct ice_ring *rx_ring);
+int ice_setup_tx_ring(struct ice_ring *tx_ring);
+int ice_setup_rx_ring(struct ice_ring *rx_ring);
+void ice_free_tx_ring(struct ice_ring *tx_ring);
+void ice_free_rx_ring(struct ice_ring *rx_ring);
+int ice_napi_poll(struct napi_struct *napi, int budget);
+int
+ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
+		   u8 *raw_packet);
+int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget);
+void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring);
+#endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644
index 000000000..bc2f4390b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_txrx_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val)
+{
+	u16 prev_ntu = rx_ring->next_to_use & ~0x7;
+
+	rx_ring->next_to_use = val;
+
+	/* update next to alloc since we have filled the ring */
+	rx_ring->next_to_alloc = val;
+
+	/* QRX_TAIL will be updated with any tail value, but hardware ignores
+	 * the lower 3 bits. This makes it so we only bump tail on meaningful
+	 * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+	 * the budget depending on the current traffic load.
+	 */
+	val &= ~0x7;
+	if (prev_ntu != val) {
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch. (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(val, rx_ring->tail);
+	}
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+	return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+	    struct sk_buff *skb, u8 rx_ptype)
+{
+	struct ice_32b_rx_flex_desc_nic *nic_mdid;
+	u32 hash;
+
+	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+		return;
+
+	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+	hash = le32_to_cpu(nic_mdid->rss_hash);
+	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
+	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+	struct ice_rx_ptype_decoded decoded;
+	u16 rx_status0, rx_status1;
+	bool ipv4, ipv6;
+
+	rx_status0 = le16_to_cpu(rx_desc->wb.status_error0);
+	rx_status1 = le16_to_cpu(rx_desc->wb.status_error1);
+
+	decoded = ice_decode_rx_desc_ptype(ptype);
+
+	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
+	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
+
+	/* check if Rx checksum is enabled */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check if HW has decoded the packet and checksum */
+	if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+		return;
+
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+				   BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+		goto checksum_fail;
+
+	if (ipv6 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+		goto checksum_fail;
+
+	/* check for L4 errors and handle packets that were not able to be
+	 * checksummed due to arrival speed
+	 */
+	if (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+		goto checksum_fail;
+
+	/* check for outer UDP checksum error in tunneled packets */
+	if ((rx_status1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) &&
+	    (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+		goto checksum_fail;
+
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
+	 */
+	if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case ICE_RX_PTYPE_INNER_PROT_TCP:
+	case ICE_RX_PTYPE_INNER_PROT_UDP:
+	case ICE_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	default:
+		break;
+	}
+	return;
+
+checksum_fail:
+	ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u8 ptype)
+{
+	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+	ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    (vlan_tag & VLAN_VID_MASK))
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+	if (napi_gro_receive(&rx_ring->q_vector->napi, skb) == GRO_DROP) {
+		/* this is tracked separately to help us debug stack drops */
+		rx_ring->rx_stats.gro_dropped++;
+		netdev_dbg(rx_ring->netdev, "Receive Queue %d: Dropped packet from GRO\n",
+			   rx_ring->q_index);
+	}
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
+{
+	u16 i = xdp_ring->next_to_use;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	dma_addr_t dma;
+
+	if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+		xdp_ring->tx_stats.tx_busy++;
+		return ICE_XDP_CONSUMED;
+	}
+
+	dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(xdp_ring->dev, dma))
+		return ICE_XDP_CONSUMED;
+
+	tx_buf = &xdp_ring->tx_buf[i];
+	tx_buf->bytecount = size;
+	tx_buf->gso_segs = 1;
+	tx_buf->raw_buf = data;
+
+	/* record length, and DMA address */
+	dma_unmap_len_set(tx_buf, len, size);
+	dma_unmap_addr_set(tx_buf, dma, dma);
+
+	tx_desc = ICE_TX_DESC(xdp_ring, i);
+	tx_desc->buf_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+						      size, 0);
+
+	/* Make certain all of the status bits have been updated
+	 * before next_to_watch is written.
+	 */
+	smp_wmb();
+
+	i++;
+	if (i == xdp_ring->count)
+		i = 0;
+
+	tx_buf->next_to_watch = tx_desc;
+	xdp_ring->next_to_use = i;
+
+	return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+
+	if (unlikely(!xdpf))
+		return ICE_XDP_CONSUMED;
+
+	return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
+{
+	if (xdp_res & ICE_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_res & ICE_XDP_TX) {
+		struct ice_ring *xdp_ring =
+			rx_ring->vsi->xdp_rings[rx_ring->q_index];
+
+		ice_xdp_ring_update_tail(xdp_ring);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644
index 000000000..58ff58f0f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
+{
+	return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
+}
+
+static inline __le64
+ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
+			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
+			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
+{
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+	writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
+void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val);
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u8 ptype);
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
new file mode 100644
index 000000000..ee3497d25
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -0,0 +1,809 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TYPE_H_
+#define _ICE_TYPE_H_
+
+#define ICE_BYTES_PER_WORD	2
+#define ICE_BYTES_PER_DWORD	4
+
+#include "ice_status.h"
+#include "ice_hw_autogen.h"
+#include "ice_osdep.h"
+#include "ice_controlq.h"
+#include "ice_lan_tx_rx.h"
+#include "ice_flex_type.h"
+#include "ice_protocol_type.h"
+
+static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
+{
+	return test_bit(tc, &bitmap);
+}
+
+static inline u64 round_up_64bit(u64 a, u32 b)
+{
+	return div64_long(((a) + (b) / 2), (b));
+}
+
+static inline u32 ice_round_to_num(u32 N, u32 R)
+{
+	return ((((N) % (R)) < ((R) / 2)) ? (((N) / (R)) * (R)) :
+		((((N) + (R) - 1) / (R)) * (R)));
+}
+
+/* Driver always calls main vsi_handle first */
+#define ICE_MAIN_VSI_HANDLE		0
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+#define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_FW_LOG		BIT_ULL(3)
+#define ICE_DBG_LINK		BIT_ULL(4)
+#define ICE_DBG_PHY		BIT_ULL(5)
+#define ICE_DBG_QCTX		BIT_ULL(6)
+#define ICE_DBG_NVM		BIT_ULL(7)
+#define ICE_DBG_LAN		BIT_ULL(8)
+#define ICE_DBG_FLOW		BIT_ULL(9)
+#define ICE_DBG_SW		BIT_ULL(13)
+#define ICE_DBG_SCHED		BIT_ULL(14)
+#define ICE_DBG_PKG		BIT_ULL(16)
+#define ICE_DBG_RES		BIT_ULL(17)
+#define ICE_DBG_AQ_MSG		BIT_ULL(24)
+#define ICE_DBG_AQ_DESC		BIT_ULL(25)
+#define ICE_DBG_AQ_DESC_BUF	BIT_ULL(26)
+#define ICE_DBG_AQ_CMD		BIT_ULL(27)
+#define ICE_DBG_USER		BIT_ULL(31)
+
+enum ice_aq_res_ids {
+	ICE_NVM_RES_ID = 1,
+	ICE_SPD_RES_ID,
+	ICE_CHANGE_LOCK_RES_ID,
+	ICE_GLOBAL_CFG_LOCK_RES_ID
+};
+
+/* FW update timeout definitions are in milliseconds */
+#define ICE_NVM_TIMEOUT			180000
+#define ICE_CHANGE_LOCK_TIMEOUT		1000
+#define ICE_GLOBAL_CFG_LOCK_TIMEOUT	5000
+
+enum ice_aq_res_access_type {
+	ICE_RES_READ = 1,
+	ICE_RES_WRITE
+};
+
+struct ice_driver_ver {
+	u8 major_ver;
+	u8 minor_ver;
+	u8 build_ver;
+	u8 subbuild_ver;
+	u8 driver_string[32];
+};
+
+enum ice_fc_mode {
+	ICE_FC_NONE = 0,
+	ICE_FC_RX_PAUSE,
+	ICE_FC_TX_PAUSE,
+	ICE_FC_FULL,
+	ICE_FC_PFC,
+	ICE_FC_DFLT
+};
+
+enum ice_phy_cache_mode {
+	ICE_FC_MODE = 0,
+	ICE_SPEED_MODE,
+	ICE_FEC_MODE
+};
+
+enum ice_fec_mode {
+	ICE_FEC_NONE = 0,
+	ICE_FEC_RS,
+	ICE_FEC_BASER,
+	ICE_FEC_AUTO
+};
+
+struct ice_phy_cache_mode_data {
+	union {
+		enum ice_fec_mode curr_user_fec_req;
+		enum ice_fc_mode curr_user_fc_req;
+		u16 curr_user_speed_req;
+	} data;
+};
+
+enum ice_set_fc_aq_failures {
+	ICE_SET_FC_AQ_FAIL_NONE = 0,
+	ICE_SET_FC_AQ_FAIL_GET,
+	ICE_SET_FC_AQ_FAIL_SET,
+	ICE_SET_FC_AQ_FAIL_UPDATE
+};
+
+/* Various MAC types */
+enum ice_mac_type {
+	ICE_MAC_UNKNOWN = 0,
+	ICE_MAC_E810,
+	ICE_MAC_GENERIC,
+};
+
+/* Media Types */
+enum ice_media_type {
+	ICE_MEDIA_UNKNOWN = 0,
+	ICE_MEDIA_FIBER,
+	ICE_MEDIA_BASET,
+	ICE_MEDIA_BACKPLANE,
+	ICE_MEDIA_DA,
+};
+
+enum ice_vsi_type {
+	ICE_VSI_PF = 0,
+	ICE_VSI_VF = 1,
+	ICE_VSI_CTRL = 3,	/* equates to ICE_VSI_PF with 1 queue pair */
+	ICE_VSI_LB = 6,
+};
+
+struct ice_link_status {
+	/* Refer to ice_aq_phy_type for bits definition */
+	u64 phy_type_low;
+	u64 phy_type_high;
+	u8 topo_media_conflict;
+	u16 max_frame_size;
+	u16 link_speed;
+	u16 req_speeds;
+	u8 lse_ena;	/* Link Status Event notification */
+	u8 link_info;
+	u8 an_info;
+	u8 ext_info;
+	u8 fec_info;
+	u8 pacing;
+	/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
+	 * ice_aqc_get_phy_caps structure
+	 */
+	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+};
+
+/* Different reset sources for which a disable queue AQ call has to be made in
+ * order to clean the Tx scheduler as a part of the reset
+ */
+enum ice_disq_rst_src {
+	ICE_NO_RESET = 0,
+	ICE_VM_RESET,
+	ICE_VF_RESET,
+};
+
+/* PHY info such as phy_type, etc... */
+struct ice_phy_info {
+	struct ice_link_status link_info;
+	struct ice_link_status link_info_old;
+	u64 phy_type_low;
+	u64 phy_type_high;
+	enum ice_media_type media_type;
+	u8 get_link_info;
+	/* Please refer to struct ice_aqc_get_link_status_data to get
+	 * detail of enable bit in curr_user_speed_req
+	 */
+	u16 curr_user_speed_req;
+	enum ice_fec_mode curr_user_fec_req;
+	enum ice_fc_mode curr_user_fc_req;
+	struct ice_aqc_set_phy_cfg_data curr_user_phy_cfg;
+};
+
+/* protocol enumeration for filters */
+enum ice_fltr_ptype {
+	/* NONE - used for undef/error */
+	ICE_FLTR_PTYPE_NONF_NONE = 0,
+	ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV4_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
+	ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+	ICE_FLTR_PTYPE_FRAG_IPV4,
+	ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV6_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV6_SCTP,
+	ICE_FLTR_PTYPE_NONF_IPV6_OTHER,
+	ICE_FLTR_PTYPE_MAX,
+};
+
+enum ice_fd_hw_seg {
+	ICE_FD_HW_SEG_NON_TUN = 0,
+	ICE_FD_HW_SEG_TUN,
+	ICE_FD_HW_SEG_MAX,
+};
+
+/* 2 VSI = 1 ICE_VSI_PF + 1 ICE_VSI_CTRL */
+#define ICE_MAX_FDIR_VSI_PER_FILTER	2
+
+struct ice_fd_hw_prof {
+	struct ice_flow_seg_info *fdir_seg[ICE_FD_HW_SEG_MAX];
+	int cnt;
+	u64 entry_h[ICE_MAX_FDIR_VSI_PER_FILTER][ICE_FD_HW_SEG_MAX];
+	u16 vsi_h[ICE_MAX_FDIR_VSI_PER_FILTER];
+};
+
+/* Common HW capabilities for SW use */
+struct ice_hw_common_caps {
+	u32 valid_functions;
+	/* DCB capabilities */
+	u32 active_tc_bitmap;
+	u32 maxtc;
+
+	/* Tx/Rx queues */
+	u16 num_rxq;		/* Number/Total Rx queues */
+	u16 rxq_first_id;	/* First queue ID for Rx queues */
+	u16 num_txq;		/* Number/Total Tx queues */
+	u16 txq_first_id;	/* First queue ID for Tx queues */
+
+	/* MSI-X vectors */
+	u16 num_msix_vectors;
+	u16 msix_vector_first_id;
+
+	/* Max MTU for function or device */
+	u16 max_mtu;
+
+	/* Virtualization support */
+	u8 sr_iov_1_1;			/* SR-IOV enabled */
+
+	/* RSS related capabilities */
+	u16 rss_table_size;		/* 512 for PFs and 64 for VFs */
+	u8 rss_table_entry_width;	/* RSS Entry width in bits */
+
+	u8 dcb;
+
+	bool nvm_update_pending_nvm;
+	bool nvm_update_pending_orom;
+	bool nvm_update_pending_netlist;
+#define ICE_NVM_PENDING_NVM_IMAGE		BIT(0)
+#define ICE_NVM_PENDING_OROM			BIT(1)
+#define ICE_NVM_PENDING_NETLIST			BIT(2)
+	bool nvm_unified_update;
+#define ICE_NVM_MGMT_UNIFIED_UPD_SUPPORT	BIT(3)
+};
+
+/* Function specific capabilities */
+struct ice_hw_func_caps {
+	struct ice_hw_common_caps common_cap;
+	u32 num_allocd_vfs;		/* Number of allocated VFs */
+	u32 vf_base_id;			/* Logical ID of the first VF */
+	u32 guar_num_vsi;
+	u32 fd_fltr_guar;		/* Number of filters guaranteed */
+	u32 fd_fltr_best_effort;	/* Number of best effort filters */
+};
+
+/* Device wide capabilities */
+struct ice_hw_dev_caps {
+	struct ice_hw_common_caps common_cap;
+	u32 num_vfs_exposed;		/* Total number of VFs exposed */
+	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
+	u32 num_flow_director_fltr;	/* Number of FD filters available */
+	u32 num_funcs;
+};
+
+/* MAC info */
+struct ice_mac_info {
+	u8 lan_addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+};
+
+/* Reset types used to determine which kind of reset was requested. These
+ * defines match what the RESET_TYPE field of the GLGEN_RSTAT register.
+ * ICE_RESET_PFR does not match any RESET_TYPE field in the GLGEN_RSTAT register
+ * because its reset source is different than the other types listed.
+ */
+enum ice_reset_req {
+	ICE_RESET_POR	= 0,
+	ICE_RESET_INVAL	= 0,
+	ICE_RESET_CORER	= 1,
+	ICE_RESET_GLOBR	= 2,
+	ICE_RESET_EMPR	= 3,
+	ICE_RESET_PFR	= 4,
+};
+
+/* Bus parameters */
+struct ice_bus_info {
+	u16 device;
+	u8 func;
+};
+
+/* Flow control (FC) parameters */
+struct ice_fc_info {
+	enum ice_fc_mode current_mode;	/* FC mode in effect */
+	enum ice_fc_mode req_mode;	/* FC mode requested by caller */
+};
+
+/* Option ROM version information */
+struct ice_orom_info {
+	u8 major;			/* Major version of OROM */
+	u8 patch;			/* Patch version of OROM */
+	u16 build;			/* Build version of OROM */
+};
+
+/* NVM Information */
+struct ice_nvm_info {
+	struct ice_orom_info orom;	/* Option ROM version info */
+	u32 eetrack;			/* NVM data version */
+	u16 sr_words;			/* Shadow RAM size in words */
+	u32 flash_size;			/* Size of available flash in bytes */
+	u8 major_ver;			/* major version of NVM package */
+	u8 minor_ver;			/* minor version of dev starter */
+	u8 blank_nvm_mode;		/* is NVM empty (no FW present) */
+};
+
+struct ice_link_default_override_tlv {
+	u8 options;
+#define ICE_LINK_OVERRIDE_OPT_M		0x3F
+#define ICE_LINK_OVERRIDE_STRICT_MODE	BIT(0)
+#define ICE_LINK_OVERRIDE_EPCT_DIS	BIT(1)
+#define ICE_LINK_OVERRIDE_PORT_DIS	BIT(2)
+#define ICE_LINK_OVERRIDE_EN		BIT(3)
+#define ICE_LINK_OVERRIDE_AUTO_LINK_DIS	BIT(4)
+#define ICE_LINK_OVERRIDE_EEE_EN	BIT(5)
+	u8 phy_config;
+#define ICE_LINK_OVERRIDE_PHY_CFG_S	8
+#define ICE_LINK_OVERRIDE_PHY_CFG_M	(0xC3 << ICE_LINK_OVERRIDE_PHY_CFG_S)
+#define ICE_LINK_OVERRIDE_PAUSE_M	0x3
+#define ICE_LINK_OVERRIDE_LESM_EN	BIT(6)
+#define ICE_LINK_OVERRIDE_AUTO_FEC_EN	BIT(7)
+	u8 fec_options;
+#define ICE_LINK_OVERRIDE_FEC_OPT_M	0xFF
+	u8 rsvd1;
+	u64 phy_type_low;
+	u64 phy_type_high;
+};
+
+#define ICE_NVM_VER_LEN	32
+
+/* netlist version information */
+struct ice_netlist_ver_info {
+	u32 major;			/* major high/low */
+	u32 minor;			/* minor high/low */
+	u32 type;			/* type high/low */
+	u32 rev;			/* revision high/low */
+	u32 hash;			/* SHA-1 hash word */
+	u16 cust_ver;			/* customer version */
+};
+
+/* Max number of port to queue branches w.r.t topology */
+#define ICE_MAX_TRAFFIC_CLASS 8
+#define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS
+
+#define ice_for_each_traffic_class(_i)	\
+	for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++)
+
+#define ICE_INVAL_TEID 0xFFFFFFFF
+
+struct ice_sched_node {
+	struct ice_sched_node *parent;
+	struct ice_sched_node *sibling; /* next sibling in the same layer */
+	struct ice_sched_node **children;
+	struct ice_aqc_txsched_elem_data info;
+	u32 agg_id;			/* aggregator group ID */
+	u16 vsi_handle;
+	u8 in_use;			/* suspended or in use */
+	u8 tx_sched_layer;		/* Logical Layer (1-9) */
+	u8 num_children;
+	u8 tc_num;
+	u8 owner;
+#define ICE_SCHED_NODE_OWNER_LAN	0
+};
+
+/* Access Macros for Tx Sched Elements data */
+#define ICE_TXSCHED_GET_NODE_TEID(x) le32_to_cpu((x)->info.node_teid)
+
+/* The aggregator type determines if identifier is for a VSI group,
+ * aggregator group, aggregator of queues, or queue group.
+ */
+enum ice_agg_type {
+	ICE_AGG_TYPE_UNKNOWN = 0,
+	ICE_AGG_TYPE_VSI,
+	ICE_AGG_TYPE_AGG, /* aggregator */
+	ICE_AGG_TYPE_Q,
+	ICE_AGG_TYPE_QG
+};
+
+/* Rate limit types */
+enum ice_rl_type {
+	ICE_UNKNOWN_BW = 0,
+	ICE_MIN_BW,		/* for CIR profile */
+	ICE_MAX_BW,		/* for EIR profile */
+	ICE_SHARED_BW		/* for shared profile */
+};
+
+#define ICE_SCHED_MIN_BW		500		/* in Kbps */
+#define ICE_SCHED_MAX_BW		100000000	/* in Kbps */
+#define ICE_SCHED_DFLT_BW		0xFFFFFFFF	/* unlimited */
+#define ICE_SCHED_DFLT_RL_PROF_ID	0
+#define ICE_SCHED_NO_SHARED_RL_PROF_ID	0xFFFF
+#define ICE_SCHED_DFLT_BW_WT		4
+#define ICE_SCHED_INVAL_PROF_ID		0xFFFF
+#define ICE_SCHED_DFLT_BURST_SIZE	(15 * 1024)	/* in bytes (15k) */
+
+ /* Data structure for saving BW information */
+enum ice_bw_type {
+	ICE_BW_TYPE_PRIO,
+	ICE_BW_TYPE_CIR,
+	ICE_BW_TYPE_CIR_WT,
+	ICE_BW_TYPE_EIR,
+	ICE_BW_TYPE_EIR_WT,
+	ICE_BW_TYPE_SHARED,
+	ICE_BW_TYPE_CNT		/* This must be last */
+};
+
+struct ice_bw {
+	u32 bw;
+	u16 bw_alloc;
+};
+
+struct ice_bw_type_info {
+	DECLARE_BITMAP(bw_t_bitmap, ICE_BW_TYPE_CNT);
+	u8 generic;
+	struct ice_bw cir_bw;
+	struct ice_bw eir_bw;
+	u32 shared_bw;
+};
+
+/* VSI queue context structure for given TC */
+struct ice_q_ctx {
+	u16  q_handle;
+	u32  q_teid;
+	/* bw_t_info saves queue BW information */
+	struct ice_bw_type_info bw_t_info;
+};
+
+/* VSI type list entry to locate corresponding VSI/aggregator nodes */
+struct ice_sched_vsi_info {
+	struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
+	struct list_head list_entry;
+	u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* driver defines the policy */
+struct ice_sched_tx_policy {
+	u16 max_num_vsis;
+	u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS];
+	u8 rdma_ena;
+};
+
+/* CEE or IEEE 802.1Qaz ETS Configuration data */
+struct ice_dcb_ets_cfg {
+	u8 willing;
+	u8 cbs;
+	u8 maxtcs;
+	u8 prio_table[ICE_MAX_TRAFFIC_CLASS];
+	u8 tcbwtable[ICE_MAX_TRAFFIC_CLASS];
+	u8 tsatable[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* CEE or IEEE 802.1Qaz PFC Configuration data */
+struct ice_dcb_pfc_cfg {
+	u8 willing;
+	u8 mbc;
+	u8 pfccap;
+	u8 pfcena;
+};
+
+/* CEE or IEEE 802.1Qaz Application Priority data */
+struct ice_dcb_app_priority_table {
+	u16 prot_id;
+	u8 priority;
+	u8 selector;
+};
+
+#define ICE_MAX_USER_PRIORITY	8
+#define ICE_DCBX_MAX_APPS	32
+#define ICE_LLDPDU_SIZE		1500
+#define ICE_TLV_STATUS_OPER	0x1
+#define ICE_TLV_STATUS_SYNC	0x2
+#define ICE_TLV_STATUS_ERR	0x4
+#define ICE_APP_PROT_ID_FCOE	0x8906
+#define ICE_APP_PROT_ID_ISCSI	0x0cbc
+#define ICE_APP_PROT_ID_ISCSI_860 0x035c
+#define ICE_APP_PROT_ID_FIP	0x8914
+#define ICE_APP_SEL_ETHTYPE	0x1
+#define ICE_APP_SEL_TCPIP	0x2
+#define ICE_CEE_APP_SEL_ETHTYPE	0x0
+#define ICE_SR_LINK_DEFAULT_OVERRIDE_PTR	0x134
+#define ICE_CEE_APP_SEL_TCPIP	0x1
+
+struct ice_dcbx_cfg {
+	u32 numapps;
+	u32 tlv_status; /* CEE mode TLV status */
+	struct ice_dcb_ets_cfg etscfg;
+	struct ice_dcb_ets_cfg etsrec;
+	struct ice_dcb_pfc_cfg pfc;
+	struct ice_dcb_app_priority_table app[ICE_DCBX_MAX_APPS];
+	u8 dcbx_mode;
+#define ICE_DCBX_MODE_CEE	0x1
+#define ICE_DCBX_MODE_IEEE	0x2
+	u8 app_mode;
+#define ICE_DCBX_APPS_NON_WILLING	0x1
+};
+
+struct ice_qos_cfg {
+	struct ice_dcbx_cfg local_dcbx_cfg;	/* Oper/Local Cfg */
+	struct ice_dcbx_cfg desired_dcbx_cfg;	/* CEE Desired Cfg */
+	struct ice_dcbx_cfg remote_dcbx_cfg;	/* Peer Cfg */
+	u8 dcbx_status : 3;			/* see ICE_DCBX_STATUS_DIS */
+	u8 is_sw_lldp : 1;
+};
+
+struct ice_port_info {
+	struct ice_sched_node *root;	/* Root Node per Port */
+	struct ice_hw *hw;		/* back pointer to HW instance */
+	u32 last_node_teid;		/* scheduler last node info */
+	u16 sw_id;			/* Initial switch ID belongs to port */
+	u16 pf_vf_num;
+	u8 port_state;
+#define ICE_SCHED_PORT_STATE_INIT	0x0
+#define ICE_SCHED_PORT_STATE_READY	0x1
+	u8 lport;
+#define ICE_LPORT_MASK			0xff
+	u16 dflt_tx_vsi_rule_id;
+	u16 dflt_tx_vsi_num;
+	u16 dflt_rx_vsi_rule_id;
+	u16 dflt_rx_vsi_num;
+	struct ice_fc_info fc;
+	struct ice_mac_info mac;
+	struct ice_phy_info phy;
+	struct mutex sched_lock;	/* protect access to TXSched tree */
+	struct ice_sched_node *
+		sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	/* List contain profile ID(s) and other params per layer */
+	struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	struct ice_qos_cfg qos_cfg;
+	u8 is_vf:1;
+};
+
+struct ice_switch_info {
+	struct list_head vsi_list_map_head;
+	struct ice_sw_recipe *recp_list;
+};
+
+/* FW logging configuration */
+struct ice_fw_log_evnt {
+	u8 cfg : 4;	/* New event enables to configure */
+	u8 cur : 4;	/* Current/active event enables */
+};
+
+struct ice_fw_log_cfg {
+	u8 cq_en : 1;    /* FW logging is enabled via the control queue */
+	u8 uart_en : 1;  /* FW logging is enabled via UART for all PFs */
+	u8 actv_evnts;   /* Cumulation of currently enabled log events */
+
+#define ICE_FW_LOG_EVNT_INFO	(ICE_AQC_FW_LOG_INFO_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_INIT	(ICE_AQC_FW_LOG_INIT_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_FLOW	(ICE_AQC_FW_LOG_FLOW_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_ERR	(ICE_AQC_FW_LOG_ERR_EN >> ICE_AQC_FW_LOG_EN_S)
+	struct ice_fw_log_evnt evnts[ICE_AQC_FW_LOG_ID_MAX];
+};
+
+/* Port hardware description */
+struct ice_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+	struct ice_aqc_layer_props *layer_info;
+	struct ice_port_info *port_info;
+	u64 debug_mask;		/* bitmap for debug mask */
+	enum ice_mac_type mac_type;
+
+	u16 fd_ctr_base;	/* FD counter base index */
+
+	/* pci info */
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+
+	u8 pf_id;		/* device profile info */
+
+	u16 max_burst_size;	/* driver sets this value */
+
+	/* Tx Scheduler values */
+	u8 num_tx_sched_layers;
+	u8 num_tx_sched_phys_layers;
+	u8 flattened_layers;
+	u8 max_cgds;
+	u8 sw_entry_point_layer;
+	u16 max_children[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	struct list_head agg_list;	/* lists all aggregator */
+
+	struct ice_vsi_ctx *vsi_ctx[ICE_MAX_VSI];
+	u8 evb_veb;		/* true for VEB, false for VEPA */
+	u8 reset_ongoing;	/* true if HW is in reset, false otherwise */
+	struct ice_bus_info bus;
+	struct ice_nvm_info nvm;
+	struct ice_hw_dev_caps dev_caps;	/* device capabilities */
+	struct ice_hw_func_caps func_caps;	/* function capabilities */
+	struct ice_netlist_ver_info netlist_ver; /* netlist version info */
+
+	struct ice_switch_info *switch_info;	/* switch filter lists */
+
+	/* Control Queue info */
+	struct ice_ctl_q_info adminq;
+	struct ice_ctl_q_info mailboxq;
+
+	u8 api_branch;		/* API branch version */
+	u8 api_maj_ver;		/* API major version */
+	u8 api_min_ver;		/* API minor version */
+	u8 api_patch;		/* API patch version */
+	u8 fw_branch;		/* firmware branch version */
+	u8 fw_maj_ver;		/* firmware major version */
+	u8 fw_min_ver;		/* firmware minor version */
+	u8 fw_patch;		/* firmware patch version */
+	u32 fw_build;		/* firmware build number */
+
+	struct ice_fw_log_cfg fw_log;
+
+/* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL
+ * register. Used for determining the ITR/INTRL granularity during
+ * initialization.
+ */
+#define ICE_MAX_AGG_BW_200G	0x0
+#define ICE_MAX_AGG_BW_100G	0X1
+#define ICE_MAX_AGG_BW_50G	0x2
+#define ICE_MAX_AGG_BW_25G	0x3
+	/* ITR granularity for different speeds */
+#define ICE_ITR_GRAN_ABOVE_25	2
+#define ICE_ITR_GRAN_MAX_25	4
+	/* ITR granularity in 1 us */
+	u8 itr_gran;
+	/* INTRL granularity for different speeds */
+#define ICE_INTRL_GRAN_ABOVE_25	4
+#define ICE_INTRL_GRAN_MAX_25	8
+	/* INTRL granularity in 1 us */
+	u8 intrl_gran;
+
+	u8 ucast_shared;	/* true if VSIs can share unicast addr */
+
+	/* Active package version (currently active) */
+	struct ice_pkg_ver active_pkg_ver;
+	u32 active_track_id;
+	u8 active_pkg_name[ICE_PKG_NAME_SIZE];
+	u8 active_pkg_in_nvm;
+
+	enum ice_aq_err pkg_dwnld_status;
+
+	/* Driver's package ver - (from the Metadata seg) */
+	struct ice_pkg_ver pkg_ver;
+	u8 pkg_name[ICE_PKG_NAME_SIZE];
+
+	/* Driver's Ice package version (from the Ice seg) */
+	struct ice_pkg_ver ice_pkg_ver;
+	u8 ice_pkg_name[ICE_PKG_NAME_SIZE];
+
+	/* Pointer to the ice segment */
+	struct ice_seg *seg;
+
+	/* Pointer to allocated copy of pkg memory */
+	u8 *pkg_copy;
+	u32 pkg_size;
+
+	/* tunneling info */
+	struct mutex tnl_lock;
+	struct ice_tunnel_table tnl;
+
+	struct udp_tunnel_nic_shared udp_tunnel_shared;
+	struct udp_tunnel_nic_info udp_tunnel_nic;
+
+	/* HW block tables */
+	struct ice_blk_info blk[ICE_BLK_COUNT];
+	struct mutex fl_profs_locks[ICE_BLK_COUNT];	/* lock fltr profiles */
+	struct list_head fl_profs[ICE_BLK_COUNT];
+
+	/* Flow Director filter info */
+	int fdir_active_fltr;
+
+	struct mutex fdir_fltr_lock;	/* protect Flow Director */
+	struct list_head fdir_list_head;
+
+	/* Book-keeping of side-band filter count per flow-type.
+	 * This is used to detect and handle input set changes for
+	 * respective flow-type.
+	 */
+	u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX];
+
+	struct ice_fd_hw_prof **fdir_prof;
+	DECLARE_BITMAP(fdir_perfect_fltr, ICE_FLTR_PTYPE_MAX);
+	struct mutex rss_locks;	/* protect RSS configuration */
+	struct list_head rss_list_head;
+};
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct ice_eth_stats {
+	u64 rx_bytes;			/* gorc */
+	u64 rx_unicast;			/* uprc */
+	u64 rx_multicast;		/* mprc */
+	u64 rx_broadcast;		/* bprc */
+	u64 rx_discards;		/* rdpc */
+	u64 rx_unknown_protocol;	/* rupp */
+	u64 tx_bytes;			/* gotc */
+	u64 tx_unicast;			/* uptc */
+	u64 tx_multicast;		/* mptc */
+	u64 tx_broadcast;		/* bptc */
+	u64 tx_discards;		/* tdpc */
+	u64 tx_errors;			/* tepc */
+};
+
+#define ICE_MAX_UP	8
+
+/* Statistics collected by the MAC */
+struct ice_hw_port_stats {
+	/* eth stats collected by the port */
+	struct ice_eth_stats eth;
+	/* additional port specific stats */
+	u64 tx_dropped_link_down;	/* tdold */
+	u64 crc_errors;			/* crcerrs */
+	u64 illegal_bytes;		/* illerrc */
+	u64 error_bytes;		/* errbc */
+	u64 mac_local_faults;		/* mlfc */
+	u64 mac_remote_faults;		/* mrfc */
+	u64 rx_len_errors;		/* rlec */
+	u64 link_xon_rx;		/* lxonrxc */
+	u64 link_xoff_rx;		/* lxoffrxc */
+	u64 link_xon_tx;		/* lxontxc */
+	u64 link_xoff_tx;		/* lxofftxc */
+	u64 priority_xon_rx[8];		/* pxonrxc[8] */
+	u64 priority_xoff_rx[8];	/* pxoffrxc[8] */
+	u64 priority_xon_tx[8];		/* pxontxc[8] */
+	u64 priority_xoff_tx[8];	/* pxofftxc[8] */
+	u64 priority_xon_2_xoff[8];	/* pxon2offc[8] */
+	u64 rx_size_64;			/* prc64 */
+	u64 rx_size_127;		/* prc127 */
+	u64 rx_size_255;		/* prc255 */
+	u64 rx_size_511;		/* prc511 */
+	u64 rx_size_1023;		/* prc1023 */
+	u64 rx_size_1522;		/* prc1522 */
+	u64 rx_size_big;		/* prc9522 */
+	u64 rx_undersize;		/* ruc */
+	u64 rx_fragments;		/* rfc */
+	u64 rx_oversize;		/* roc */
+	u64 rx_jabber;			/* rjc */
+	u64 tx_size_64;			/* ptc64 */
+	u64 tx_size_127;		/* ptc127 */
+	u64 tx_size_255;		/* ptc255 */
+	u64 tx_size_511;		/* ptc511 */
+	u64 tx_size_1023;		/* ptc1023 */
+	u64 tx_size_1522;		/* ptc1522 */
+	u64 tx_size_big;		/* ptc9522 */
+	/* flow director stats */
+	u32 fd_sb_status;
+	u64 fd_sb_match;
+};
+
+/* Checksum and Shadow RAM pointers */
+#define ICE_SR_BOOT_CFG_PTR		0x132
+#define ICE_SR_NVM_WOL_CFG		0x19
+#define ICE_NVM_OROM_VER_OFF		0x02
+#define ICE_SR_PBA_BLOCK_PTR		0x16
+#define ICE_SR_NVM_DEV_STARTER_VER	0x18
+#define ICE_SR_NVM_EETRACK_LO		0x2D
+#define ICE_SR_NVM_EETRACK_HI		0x2E
+#define ICE_NVM_VER_LO_SHIFT		0
+#define ICE_NVM_VER_LO_MASK		(0xff << ICE_NVM_VER_LO_SHIFT)
+#define ICE_NVM_VER_HI_SHIFT		12
+#define ICE_NVM_VER_HI_MASK		(0xf << ICE_NVM_VER_HI_SHIFT)
+#define ICE_OROM_VER_PATCH_SHIFT	0
+#define ICE_OROM_VER_PATCH_MASK		(0xff << ICE_OROM_VER_PATCH_SHIFT)
+#define ICE_OROM_VER_BUILD_SHIFT	8
+#define ICE_OROM_VER_BUILD_MASK		(0xffff << ICE_OROM_VER_BUILD_SHIFT)
+#define ICE_OROM_VER_SHIFT		24
+#define ICE_OROM_VER_MASK		(0xff << ICE_OROM_VER_SHIFT)
+#define ICE_SR_PFA_PTR			0x40
+#define ICE_SR_1ST_NVM_BANK_PTR		0x42
+#define ICE_SR_1ST_OROM_BANK_PTR	0x44
+#define ICE_SR_NETLIST_BANK_PTR		0x46
+#define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
+
+/* Link override related */
+#define ICE_SR_PFA_LINK_OVERRIDE_WORDS		10
+#define ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS	4
+#define ICE_SR_PFA_LINK_OVERRIDE_OFFSET		2
+#define ICE_SR_PFA_LINK_OVERRIDE_FEC_OFFSET	1
+#define ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET	2
+#define ICE_FW_API_LINK_OVERRIDE_MAJ		1
+#define ICE_FW_API_LINK_OVERRIDE_MIN		5
+#define ICE_FW_API_LINK_OVERRIDE_PATCH		2
+
+#define ICE_SR_WORDS_IN_1KB		512
+
+/* Hash redirection LUT for VSI - maximum array size */
+#define ICE_VSIQF_HLUT_ARRAY_SIZE	((VSIQF_HLUT_MAX_INDEX + 1) * 4)
+
+#endif /* _ICE_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
new file mode 100644
index 000000000..a980d3378
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -0,0 +1,4167 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+
+/**
+ * ice_validate_vf_id - helper to check if VF ID is valid
+ * @pf: pointer to the PF structure
+ * @vf_id: the ID of the VF to check
+ */
+static int ice_validate_vf_id(struct ice_pf *pf, u16 vf_id)
+{
+	/* vf_id range is only valid for 0-255, and should always be unsigned */
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %u\n", vf_id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * ice_check_vf_init - helper to check if VF init complete
+ * @pf: pointer to the PF structure
+ * @vf: the pointer to the VF to check
+ */
+static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf)
+{
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		dev_err(ice_pf_to_dev(pf), "VF ID: %u in reset. Try again.\n",
+			vf->vf_id);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/**
+ * ice_err_to_virt_err - translate errors for VF return code
+ * @ice_err: error return code
+ */
+static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err)
+{
+	switch (ice_err) {
+	case ICE_SUCCESS:
+		return VIRTCHNL_STATUS_SUCCESS;
+	case ICE_ERR_BAD_PTR:
+	case ICE_ERR_INVAL_SIZE:
+	case ICE_ERR_DEVICE_NOT_SUPPORTED:
+	case ICE_ERR_PARAM:
+	case ICE_ERR_CFG:
+		return VIRTCHNL_STATUS_ERR_PARAM;
+	case ICE_ERR_NO_MEMORY:
+		return VIRTCHNL_STATUS_ERR_NO_MEMORY;
+	case ICE_ERR_NOT_READY:
+	case ICE_ERR_RESET_FAILED:
+	case ICE_ERR_FW_API_VER:
+	case ICE_ERR_AQ_ERROR:
+	case ICE_ERR_AQ_TIMEOUT:
+	case ICE_ERR_AQ_FULL:
+	case ICE_ERR_AQ_NO_WORK:
+	case ICE_ERR_AQ_EMPTY:
+		return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+	default:
+		return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+	}
+}
+
+/**
+ * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
+ * @pf: pointer to the PF structure
+ * @v_opcode: operation code
+ * @v_retval: return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ */
+static void
+ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode,
+		    enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+	struct ice_hw *hw = &pf->hw;
+	unsigned int i;
+
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		/* Not all vfs are enabled so skip the ones that are not */
+		if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+		    !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			continue;
+
+		/* Ignore return value on purpose - a given VF may fail, but
+		 * we need to keep going and send to all of them
+		 */
+		ice_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, msg,
+				      msglen, NULL);
+	}
+}
+
+/**
+ * ice_set_pfe_link - Set the link speed/status of the virtchnl_pf_event
+ * @vf: pointer to the VF structure
+ * @pfe: pointer to the virtchnl_pf_event to set link speed/status for
+ * @ice_link_speed: link speed specified by ICE_AQ_LINK_SPEED_*
+ * @link_up: whether or not to set the link up/down
+ */
+static void
+ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
+		 int ice_link_speed, bool link_up)
+{
+	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+		pfe->event_data.link_event_adv.link_status = link_up;
+		/* Speed in Mbps */
+		pfe->event_data.link_event_adv.link_speed =
+			ice_conv_link_speed_to_virtchnl(true, ice_link_speed);
+	} else {
+		pfe->event_data.link_event.link_status = link_up;
+		/* Legacy method for virtchnl link speeds */
+		pfe->event_data.link_event.link_speed =
+			(enum virtchnl_link_speed)
+			ice_conv_link_speed_to_virtchnl(false, ice_link_speed);
+	}
+}
+
+/**
+ * ice_vf_has_no_qs_ena - check if the VF has any Rx or Tx queues enabled
+ * @vf: the VF to check
+ *
+ * Returns true if the VF has no Rx and no Tx queues enabled and returns false
+ * otherwise
+ */
+static bool ice_vf_has_no_qs_ena(struct ice_vf *vf)
+{
+	return (!bitmap_weight(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF) &&
+		!bitmap_weight(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF));
+}
+
+/**
+ * ice_is_vf_link_up - check if the VF's link is up
+ * @vf: VF to check if link is up
+ */
+static bool ice_is_vf_link_up(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+
+	if (ice_check_vf_init(pf, vf))
+		return false;
+
+	if (ice_vf_has_no_qs_ena(vf))
+		return false;
+	else if (vf->link_forced)
+		return vf->link_up;
+	else
+		return pf->hw.port_info->phy.link_info.link_info &
+			ICE_AQ_LINK_UP;
+}
+
+/**
+ * ice_vc_notify_vf_link_state - Inform a VF of link status
+ * @vf: pointer to the VF structure
+ *
+ * send a link status message to a single VF
+ */
+static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
+{
+	struct virtchnl_pf_event pfe = { 0 };
+	struct ice_hw *hw = &vf->pf->hw;
+
+	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
+	pfe.severity = PF_EVENT_SEVERITY_INFO;
+
+	if (ice_is_vf_link_up(vf))
+		ice_set_pfe_link(vf, &pfe,
+				 hw->port_info->phy.link_info.link_speed, true);
+	else
+		ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false);
+
+	ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
+			      sizeof(pfe), NULL);
+}
+
+/**
+ * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access
+ * @vf: VF to remove access to VSI for
+ */
+static void ice_vf_invalidate_vsi(struct ice_vf *vf)
+{
+	vf->lan_vsi_idx = ICE_NO_VSI;
+	vf->lan_vsi_num = ICE_NO_VSI;
+}
+
+/**
+ * ice_vf_vsi_release - invalidate the VF's VSI after freeing it
+ * @vf: invalidate this VF's VSI after freeing it
+ */
+static void ice_vf_vsi_release(struct ice_vf *vf)
+{
+	ice_vsi_release(vf->pf->vsi[vf->lan_vsi_idx]);
+	ice_vf_invalidate_vsi(vf);
+}
+
+/**
+ * ice_free_vf_res - Free a VF's resources
+ * @vf: pointer to the VF info
+ */
+static void ice_free_vf_res(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	int i, last_vector_idx;
+
+	/* First, disable VF's configuration API to prevent OS from
+	 * accessing the VF's VSI after it's freed or invalidated.
+	 */
+	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+
+	/* free VSI and disconnect it from the parent uplink */
+	if (vf->lan_vsi_idx != ICE_NO_VSI) {
+		ice_vf_vsi_release(vf);
+		vf->num_mac = 0;
+	}
+
+	last_vector_idx = vf->first_vector_idx + pf->num_msix_per_vf - 1;
+
+	/* clear VF MDD event information */
+	memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
+	memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
+
+	/* Disable interrupts so that VF starts in a known state */
+	for (i = vf->first_vector_idx; i <= last_vector_idx; i++) {
+		wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M);
+		ice_flush(&pf->hw);
+	}
+	/* reset some of the state variables keeping track of the resources */
+	clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+}
+
+/**
+ * ice_dis_vf_mappings
+ * @vf: pointer to the VF structure
+ */
+static void ice_dis_vf_mappings(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	int first, last, v;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	dev = ice_pf_to_dev(pf);
+	wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
+	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
+
+	first = vf->first_vector_idx;
+	last = first + pf->num_msix_per_vf - 1;
+	for (v = first; v <= last; v++) {
+		u32 reg;
+
+		reg = (((1 << GLINT_VECT2FUNC_IS_PF_S) &
+			GLINT_VECT2FUNC_IS_PF_M) |
+		       ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
+			GLINT_VECT2FUNC_PF_NUM_M));
+		wr32(hw, GLINT_VECT2FUNC(v), reg);
+	}
+
+	if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG)
+		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0);
+	else
+		dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
+
+	if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG)
+		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0);
+	else
+		dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
+}
+
+/**
+ * ice_sriov_free_msix_res - Reset/free any used MSIX resources
+ * @pf: pointer to the PF structure
+ *
+ * Since no MSIX entries are taken from the pf->irq_tracker then just clear
+ * the pf->sriov_base_vector.
+ *
+ * Returns 0 on success, and -EINVAL on error.
+ */
+static int ice_sriov_free_msix_res(struct ice_pf *pf)
+{
+	struct ice_res_tracker *res;
+
+	if (!pf)
+		return -EINVAL;
+
+	res = pf->irq_tracker;
+	if (!res)
+		return -EINVAL;
+
+	/* give back irq_tracker resources used */
+	WARN_ON(pf->sriov_base_vector < res->num_entries);
+
+	pf->sriov_base_vector = 0;
+
+	return 0;
+}
+
+/**
+ * ice_set_vf_state_qs_dis - Set VF queues state to disabled
+ * @vf: pointer to the VF structure
+ */
+void ice_set_vf_state_qs_dis(struct ice_vf *vf)
+{
+	/* Clear Rx/Tx enabled queues flag */
+	bitmap_zero(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF);
+	bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+	clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+}
+
+/**
+ * ice_dis_vf_qs - Disable the VF queues
+ * @vf: pointer to the VF structure
+ */
+static void ice_dis_vf_qs(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+	ice_vsi_stop_all_rx_rings(vsi);
+	ice_set_vf_state_qs_dis(vf);
+}
+
+/**
+ * ice_free_vfs - Free all VFs
+ * @pf: pointer to the PF structure
+ */
+void ice_free_vfs(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	unsigned int tmp, i;
+
+	if (!pf->vf)
+		return;
+
+	while (test_and_set_bit(__ICE_VF_DIS, pf->state))
+		usleep_range(1000, 2000);
+
+	/* Disable IOV before freeing resources. This lets any VF drivers
+	 * running in the host get themselves cleaned up before we yank
+	 * the carpet out from underneath their feet.
+	 */
+	if (!pci_vfs_assigned(pf->pdev))
+		pci_disable_sriov(pf->pdev);
+	else
+		dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
+
+	tmp = pf->num_alloc_vfs;
+	pf->num_qps_per_vf = 0;
+	pf->num_alloc_vfs = 0;
+	for (i = 0; i < tmp; i++) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		mutex_lock(&vf->cfg_lock);
+
+		ice_dis_vf_qs(vf);
+
+		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+			/* disable VF qp mappings and set VF disable state */
+			ice_dis_vf_mappings(vf);
+			set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+			ice_free_vf_res(vf);
+		}
+
+		mutex_unlock(&vf->cfg_lock);
+
+		mutex_destroy(&vf->cfg_lock);
+	}
+
+	if (ice_sriov_free_msix_res(pf))
+		dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
+
+	devm_kfree(dev, pf->vf);
+	pf->vf = NULL;
+
+	/* This check is for when the driver is unloaded while VFs are
+	 * assigned. Setting the number of VFs to 0 through sysfs is caught
+	 * before this function ever gets called.
+	 */
+	if (!pci_vfs_assigned(pf->pdev)) {
+		unsigned int vf_id;
+
+		/* Acknowledge VFLR for all VFs. Without this, VFs will fail to
+		 * work correctly when SR-IOV gets re-enabled.
+		 */
+		for (vf_id = 0; vf_id < tmp; vf_id++) {
+			u32 reg_idx, bit_idx;
+
+			reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
+			bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+			wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+		}
+	}
+	clear_bit(__ICE_VF_DIS, pf->state);
+	clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+}
+
+/**
+ * ice_trigger_vf_reset - Reset a VF on HW
+ * @vf: pointer to the VF structure
+ * @is_vflr: true if VFLR was issued, false if not
+ * @is_pfr: true if the reset was triggered due to a previous PFR
+ *
+ * Trigger hardware to start a reset for a particular VF. Expects the caller
+ * to wait the proper amount of time to allow hardware to reset the VF before
+ * it cleans up and restores VF functionality.
+ */
+static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
+{
+	struct ice_pf *pf = vf->pf;
+	u32 reg, reg_idx, bit_idx;
+	unsigned int vf_abs_id, i;
+	struct device *dev;
+	struct ice_hw *hw;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	/* Inform VF that it is no longer active, as a warning */
+	clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+
+	/* Disable VF's configuration API during reset. The flag is re-enabled
+	 * when it's safe again to access VF's VSI.
+	 */
+	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+
+	/* VF_MBX_ARQLEN and VF_MBX_ATQLEN are cleared by PFR, so the driver
+	 * needs to clear them in the case of VFR/VFLR. If this is done for
+	 * PFR, it can mess up VF resets because the VF driver may already
+	 * have started cleanup by the time we get here.
+	 */
+	if (!is_pfr) {
+		wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0);
+		wr32(hw, VF_MBX_ATQLEN(vf->vf_id), 0);
+	}
+
+	/* In the case of a VFLR, the HW has already reset the VF and we
+	 * just need to clean up, so don't hit the VFRTRIG register.
+	 */
+	if (!is_vflr) {
+		/* reset VF using VPGEN_VFRTRIG reg */
+		reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+		reg |= VPGEN_VFRTRIG_VFSWR_M;
+		wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+	}
+	/* clear the VFLR bit in GLGEN_VFLRSTAT */
+	reg_idx = (vf_abs_id) / 32;
+	bit_idx = (vf_abs_id) % 32;
+	wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+	ice_flush(hw);
+
+	wr32(hw, PF_PCI_CIAA,
+	     VF_DEVICE_STATUS | (vf_abs_id << PF_PCI_CIAA_VF_NUM_S));
+	for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) {
+		reg = rd32(hw, PF_PCI_CIAD);
+		/* no transactions pending so stop polling */
+		if ((reg & VF_TRANS_PENDING_M) == 0)
+			break;
+
+		dev_err(dev, "VF %u PCI transactions stuck\n", vf->vf_id);
+		udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
+	}
+}
+
+/**
+ * ice_vsi_manage_pvid - Enable or disable port VLAN for VSI
+ * @vsi: the VSI to update
+ * @pvid_info: VLAN ID and QoS used to set the PVID VSI context field
+ * @enable: true for enable PVID false for disable
+ */
+static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 pvid_info, bool enable)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_aqc_vsi_props *info;
+	struct ice_vsi_ctx *ctxt;
+	enum ice_status status;
+	int ret = 0;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+	info = &ctxt->info;
+	if (enable) {
+		info->vlan_flags = ICE_AQ_VSI_VLAN_MODE_UNTAGGED |
+			ICE_AQ_VSI_PVLAN_INSERT_PVID |
+			ICE_AQ_VSI_VLAN_EMOD_STR;
+		info->sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	} else {
+		info->vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING |
+			ICE_AQ_VSI_VLAN_MODE_ALL;
+		info->sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	}
+
+	info->pvid = cpu_to_le16(pvid_info);
+	info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+					   ICE_AQ_VSI_PROP_SW_VALID);
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %s aq_err %s\n",
+			 ice_stat_str(status),
+			 ice_aq_str(hw->adminq.sq_last_status));
+		ret = -EIO;
+		goto out;
+	}
+
+	vsi->info.vlan_flags = info->vlan_flags;
+	vsi->info.sw_flags2 = info->sw_flags2;
+	vsi->info.pvid = info->pvid;
+out:
+	kfree(ctxt);
+	return ret;
+}
+
+/**
+ * ice_vf_get_port_info - Get the VF's port info structure
+ * @vf: VF used to get the port info structure for
+ */
+static struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf)
+{
+	return vf->pf->hw.port_info;
+}
+
+/**
+ * ice_vf_vsi_setup - Set up a VF VSI
+ * @vf: VF to setup VSI for
+ *
+ * Returns pointer to the successfully allocated VSI struct on success,
+ * otherwise returns NULL on failure.
+ */
+static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
+{
+	struct ice_port_info *pi = ice_vf_get_port_info(vf);
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id);
+
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n");
+		ice_vf_invalidate_vsi(vf);
+		return NULL;
+	}
+
+	vf->lan_vsi_idx = vsi->idx;
+	vf->lan_vsi_num = vsi->vsi_num;
+
+	return vsi;
+}
+
+/**
+ * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF that the first MSIX vector index is being calculated for
+ *
+ * This returns the first MSIX vector index in PF space that is used by this VF.
+ * This index is used when accessing PF relative registers such as
+ * GLINT_VECT2FUNC and GLINT_DYN_CTL.
+ * This will always be the OICR index in the AVF driver so any functionality
+ * using vf->first_vector_idx for queue configuration will have to increment by
+ * 1 to avoid meddling with the OICR index.
+ */
+static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
+{
+	return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf;
+}
+
+/**
+ * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN
+ * @vf: VF to add MAC filters for
+ *
+ * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
+ * always re-adds either a VLAN 0 or port VLAN based filter after reset.
+ */
+static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	u16 vlan_id = 0;
+	int err;
+
+	if (vf->port_vlan_info) {
+		err = ice_vsi_manage_pvid(vsi, vf->port_vlan_info, true);
+		if (err) {
+			dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n",
+				vf->vf_id, err);
+			return err;
+		}
+
+		vlan_id = vf->port_vlan_info & VLAN_VID_MASK;
+	}
+
+	/* vlan_id will either be 0 or the port VLAN number */
+	err = ice_vsi_add_vlan(vsi, vlan_id, ICE_FWD_TO_VSI);
+	if (err) {
+		dev_err(dev, "failed to add %s VLAN %u filter for VF %u, error %d\n",
+			vf->port_vlan_info ? "port" : "", vlan_id, vf->vf_id,
+			err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA
+ * @vf: VF to add MAC filters for
+ *
+ * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
+ * always re-adds a broadcast filter and the VF's perm_addr/LAA after reset.
+ */
+static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum ice_status status;
+	u8 broadcast[ETH_ALEN];
+
+	eth_broadcast_addr(broadcast);
+	status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
+	if (status) {
+		dev_err(dev, "failed to add broadcast MAC filter for VF %u, error %s\n",
+			vf->vf_id, ice_stat_str(status));
+		return ice_status_to_errno(status);
+	}
+
+	vf->num_mac++;
+
+	if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) {
+		status = ice_fltr_add_mac(vsi, vf->dflt_lan_addr.addr,
+					  ICE_FWD_TO_VSI);
+		if (status) {
+			dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %s\n",
+				&vf->dflt_lan_addr.addr[0], vf->vf_id,
+				ice_stat_str(status));
+			return ice_status_to_errno(status);
+		}
+		vf->num_mac++;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vf_set_host_trust_cfg - set trust setting based on pre-reset value
+ * @vf: VF to configure trust setting for
+ */
+static void ice_vf_set_host_trust_cfg(struct ice_vf *vf)
+{
+	if (vf->trusted)
+		set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+	else
+		clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+}
+
+/**
+ * ice_ena_vf_msix_mappings - enable VF MSIX mappings in hardware
+ * @vf: VF to enable MSIX mappings for
+ *
+ * Some of the registers need to be indexed/configured using hardware global
+ * device values and other registers need 0-based values, which represent PF
+ * based values.
+ */
+static void ice_ena_vf_msix_mappings(struct ice_vf *vf)
+{
+	int device_based_first_msix, device_based_last_msix;
+	int pf_based_first_msix, pf_based_last_msix, v;
+	struct ice_pf *pf = vf->pf;
+	int device_based_vf_id;
+	struct ice_hw *hw;
+	u32 reg;
+
+	hw = &pf->hw;
+	pf_based_first_msix = vf->first_vector_idx;
+	pf_based_last_msix = (pf_based_first_msix + pf->num_msix_per_vf) - 1;
+
+	device_based_first_msix = pf_based_first_msix +
+		pf->hw.func_caps.common_cap.msix_vector_first_id;
+	device_based_last_msix =
+		(device_based_first_msix + pf->num_msix_per_vf) - 1;
+	device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) &
+		VPINT_ALLOC_FIRST_M) |
+	       ((device_based_last_msix << VPINT_ALLOC_LAST_S) &
+		VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M);
+	wr32(hw, VPINT_ALLOC(vf->vf_id), reg);
+
+	reg = (((device_based_first_msix << VPINT_ALLOC_PCI_FIRST_S)
+		 & VPINT_ALLOC_PCI_FIRST_M) |
+	       ((device_based_last_msix << VPINT_ALLOC_PCI_LAST_S) &
+		VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M);
+	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg);
+
+	/* map the interrupts to its functions */
+	for (v = pf_based_first_msix; v <= pf_based_last_msix; v++) {
+		reg = (((device_based_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
+			GLINT_VECT2FUNC_VF_NUM_M) |
+		       ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
+			GLINT_VECT2FUNC_PF_NUM_M));
+		wr32(hw, GLINT_VECT2FUNC(v), reg);
+	}
+
+	/* Map mailbox interrupt to VF MSI-X vector 0 */
+	wr32(hw, VPINT_MBX_CTL(device_based_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_ena_vf_q_mappings - enable Rx/Tx queue mappings for a VF
+ * @vf: VF to enable the mappings for
+ * @max_txq: max Tx queues allowed on the VF's VSI
+ * @max_rxq: max Rx queues allowed on the VF's VSI
+ */
+static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_hw *hw = &vf->pf->hw;
+	u32 reg;
+
+	/* set regardless of mapping mode */
+	wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M);
+
+	/* VF Tx queues allocation */
+	if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) {
+		/* set the VF PF Tx queue range
+		 * VFNUMQ value should be set to (number of queues - 1). A value
+		 * of 0 means 1 queue and a value of 255 means 256 queues
+		 */
+		reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) &
+			VPLAN_TX_QBASE_VFFIRSTQ_M) |
+		       (((max_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) &
+			VPLAN_TX_QBASE_VFNUMQ_M));
+		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg);
+	} else {
+		dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
+	}
+
+	/* set regardless of mapping mode */
+	wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M);
+
+	/* VF Rx queues allocation */
+	if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) {
+		/* set the VF PF Rx queue range
+		 * VFNUMQ value should be set to (number of queues - 1). A value
+		 * of 0 means 1 queue and a value of 255 means 256 queues
+		 */
+		reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) &
+			VPLAN_RX_QBASE_VFFIRSTQ_M) |
+		       (((max_rxq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) &
+			VPLAN_RX_QBASE_VFNUMQ_M));
+		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg);
+	} else {
+		dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
+	}
+}
+
+/**
+ * ice_ena_vf_mappings - enable VF MSIX and queue mapping
+ * @vf: pointer to the VF structure
+ */
+static void ice_ena_vf_mappings(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+
+	ice_ena_vf_msix_mappings(vf);
+	ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq);
+}
+
+/**
+ * ice_determine_res
+ * @pf: pointer to the PF structure
+ * @avail_res: available resources in the PF structure
+ * @max_res: maximum resources that can be given per VF
+ * @min_res: minimum resources that can be given per VF
+ *
+ * Returns non-zero value if resources (queues/vectors) are available or
+ * returns zero if PF cannot accommodate for all num_alloc_vfs.
+ */
+static int
+ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
+{
+	bool checked_min_res = false;
+	int res;
+
+	/* start by checking if PF can assign max number of resources for
+	 * all num_alloc_vfs.
+	 * if yes, return number per VF
+	 * If no, divide by 2 and roundup, check again
+	 * repeat the loop till we reach a point where even minimum resources
+	 * are not available, in that case return 0
+	 */
+	res = max_res;
+	while ((res >= min_res) && !checked_min_res) {
+		int num_all_res;
+
+		num_all_res = pf->num_alloc_vfs * res;
+		if (num_all_res <= avail_res)
+			return res;
+
+		if (res == min_res)
+			checked_min_res = true;
+
+		res = DIV_ROUND_UP(res, 2);
+	}
+	return 0;
+}
+
+/**
+ * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space
+ * @vf: VF to calculate the register index for
+ * @q_vector: a q_vector associated to the VF
+ */
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf;
+
+	if (!vf || !q_vector)
+		return -EINVAL;
+
+	pf = vf->pf;
+
+	/* always add one to account for the OICR being the first MSIX */
+	return pf->sriov_base_vector + pf->num_msix_per_vf * vf->vf_id +
+		q_vector->v_idx + 1;
+}
+
+/**
+ * ice_get_max_valid_res_idx - Get the max valid resource index
+ * @res: pointer to the resource to find the max valid index for
+ *
+ * Start from the end of the ice_res_tracker and return right when we find the
+ * first res->list entry with the ICE_RES_VALID_BIT set. This function is only
+ * valid for SR-IOV because it is the only consumer that manipulates the
+ * res->end and this is always called when res->end is set to res->num_entries.
+ */
+static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
+{
+	int i;
+
+	if (!res)
+		return -EINVAL;
+
+	for (i = res->num_entries - 1; i >= 0; i--)
+		if (res->list[i] & ICE_RES_VALID_BIT)
+			return i;
+
+	return 0;
+}
+
+/**
+ * ice_sriov_set_msix_res - Set any used MSIX resources
+ * @pf: pointer to PF structure
+ * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
+ *
+ * This function allows SR-IOV resources to be taken from the end of the PF's
+ * allowed HW MSIX vectors so that the irq_tracker will not be affected. We
+ * just set the pf->sriov_base_vector and return success.
+ *
+ * If there are not enough resources available, return an error. This should
+ * always be caught by ice_set_per_vf_res().
+ *
+ * Return 0 on success, and -EINVAL when there are not enough MSIX vectors
+ * in the PF's space available for SR-IOV.
+ */
+static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
+{
+	u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
+	int vectors_used = pf->irq_tracker->num_entries;
+	int sriov_base_vector;
+
+	sriov_base_vector = total_vectors - num_msix_needed;
+
+	/* make sure we only grab irq_tracker entries from the list end and
+	 * that we have enough available MSIX vectors
+	 */
+	if (sriov_base_vector < vectors_used)
+		return -EINVAL;
+
+	pf->sriov_base_vector = sriov_base_vector;
+
+	return 0;
+}
+
+/**
+ * ice_set_per_vf_res - check if vectors and queues are available
+ * @pf: pointer to the PF structure
+ *
+ * First, determine HW interrupts from common pool. If we allocate fewer VFs, we
+ * get more vectors and can enable more queues per VF. Note that this does not
+ * grab any vectors from the SW pool already allocated. Also note, that all
+ * vector counts include one for each VF's miscellaneous interrupt vector
+ * (i.e. OICR).
+ *
+ * Minimum VFs - 2 vectors, 1 queue pair
+ * Small VFs - 5 vectors, 4 queue pairs
+ * Medium VFs - 17 vectors, 16 queue pairs
+ *
+ * Second, determine number of queue pairs per VF by starting with a pre-defined
+ * maximum each VF supports. If this is not possible, then we adjust based on
+ * queue pairs available on the device.
+ *
+ * Lastly, set queue and MSI-X VF variables tracked by the PF so it can be used
+ * by each VF during VF initialization and reset.
+ */
+static int ice_set_per_vf_res(struct ice_pf *pf)
+{
+	int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+	int msix_avail_per_vf, msix_avail_for_sriov;
+	struct device *dev = ice_pf_to_dev(pf);
+	u16 num_msix_per_vf, num_txq, num_rxq;
+
+	if (!pf->num_alloc_vfs || max_valid_res_idx < 0)
+		return -EINVAL;
+
+	/* determine MSI-X resources per VF */
+	msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors -
+		pf->irq_tracker->num_entries;
+	msix_avail_per_vf = msix_avail_for_sriov / pf->num_alloc_vfs;
+	if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) {
+		num_msix_per_vf = ICE_NUM_VF_MSIX_MED;
+	} else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_SMALL) {
+		num_msix_per_vf = ICE_NUM_VF_MSIX_SMALL;
+	} else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MULTIQ_MIN) {
+		num_msix_per_vf = ICE_NUM_VF_MSIX_MULTIQ_MIN;
+	} else if (msix_avail_per_vf >= ICE_MIN_INTR_PER_VF) {
+		num_msix_per_vf = ICE_MIN_INTR_PER_VF;
+	} else {
+		dev_err(dev, "Only %d MSI-X interrupts available for SR-IOV. Not enough to support minimum of %d MSI-X interrupts per VF for %d VFs\n",
+			msix_avail_for_sriov, ICE_MIN_INTR_PER_VF,
+			pf->num_alloc_vfs);
+		return -EIO;
+	}
+
+	/* determine queue resources per VF */
+	num_txq = ice_determine_res(pf, ice_get_avail_txq_count(pf),
+				    min_t(u16,
+					  num_msix_per_vf - ICE_NONQ_VECS_VF,
+					  ICE_MAX_RSS_QS_PER_VF),
+				    ICE_MIN_QS_PER_VF);
+
+	num_rxq = ice_determine_res(pf, ice_get_avail_rxq_count(pf),
+				    min_t(u16,
+					  num_msix_per_vf - ICE_NONQ_VECS_VF,
+					  ICE_MAX_RSS_QS_PER_VF),
+				    ICE_MIN_QS_PER_VF);
+
+	if (!num_txq || !num_rxq) {
+		dev_err(dev, "Not enough queues to support minimum of %d queue pairs per VF for %d VFs\n",
+			ICE_MIN_QS_PER_VF, pf->num_alloc_vfs);
+		return -EIO;
+	}
+
+	if (ice_sriov_set_msix_res(pf, num_msix_per_vf * pf->num_alloc_vfs)) {
+		dev_err(dev, "Unable to set MSI-X resources for %d VFs\n",
+			pf->num_alloc_vfs);
+		return -EINVAL;
+	}
+
+	/* only allow equal Tx/Rx queue count (i.e. queue pairs) */
+	pf->num_qps_per_vf = min_t(int, num_txq, num_rxq);
+	pf->num_msix_per_vf = num_msix_per_vf;
+	dev_info(dev, "Enabling %d VFs with %d vectors and %d queues per VF\n",
+		 pf->num_alloc_vfs, pf->num_msix_per_vf, pf->num_qps_per_vf);
+
+	return 0;
+}
+
+/**
+ * ice_clear_vf_reset_trigger - enable VF to access hardware
+ * @vf: VF to enabled hardware access for
+ */
+static void ice_clear_vf_reset_trigger(struct ice_vf *vf)
+{
+	struct ice_hw *hw = &vf->pf->hw;
+	u32 reg;
+
+	reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+	reg &= ~VPGEN_VFRTRIG_VFSWR_M;
+	wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+	ice_flush(hw);
+}
+
+/**
+ * ice_vf_set_vsi_promisc - set given VF VSI to given promiscuous mode(s)
+ * @vf: pointer to the VF info
+ * @vsi: the VSI being configured
+ * @promisc_m: mask of promiscuous config bits
+ * @rm_promisc: promisc flag request from the VF to remove or add filter
+ *
+ * This function configures VF VSI promiscuous mode, based on the VF requests,
+ * for Unicast, Multicast and VLAN
+ */
+static enum ice_status
+ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
+		       bool rm_promisc)
+{
+	struct ice_pf *pf = vf->pf;
+	enum ice_status status = 0;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+	if (vsi->num_vlan) {
+		status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m,
+						  rm_promisc);
+	} else if (vf->port_vlan_info) {
+		if (rm_promisc)
+			status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
+						       vf->port_vlan_info);
+		else
+			status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
+						     vf->port_vlan_info);
+	} else {
+		if (rm_promisc)
+			status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
+						       0);
+		else
+			status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
+						     0);
+	}
+
+	return status;
+}
+
+static void ice_vf_clear_counters(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+
+	vf->num_mac = 0;
+	vsi->num_vlan = 0;
+	memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
+	memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
+}
+
+/**
+ * ice_vf_pre_vsi_rebuild - tasks to be done prior to VSI rebuild
+ * @vf: VF to perform pre VSI rebuild tasks
+ *
+ * These tasks are items that don't need to be amortized since they are most
+ * likely called in a for loop with all VF(s) in the reset_all_vfs() case.
+ */
+static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf)
+{
+	ice_vf_clear_counters(vf);
+	ice_clear_vf_reset_trigger(vf);
+}
+
+/**
+ * ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset
+ * @vf: VF to rebuild host configuration on
+ */
+static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+
+	ice_vf_set_host_trust_cfg(vf);
+
+	if (ice_vf_rebuild_host_mac_cfg(vf))
+		dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n",
+			vf->vf_id);
+
+	if (ice_vf_rebuild_host_vlan_cfg(vf))
+		dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
+			vf->vf_id);
+}
+
+/**
+ * ice_vf_rebuild_vsi_with_release - release and setup the VF's VSI
+ * @vf: VF to release and setup the VSI for
+ *
+ * This is only called when a single VF is being reset (i.e. VFR, VFLR, host VF
+ * configuration change, etc.).
+ */
+static int ice_vf_rebuild_vsi_with_release(struct ice_vf *vf)
+{
+	ice_vf_vsi_release(vf);
+	if (!ice_vf_vsi_setup(vf))
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * ice_vf_rebuild_vsi - rebuild the VF's VSI
+ * @vf: VF to rebuild the VSI for
+ *
+ * This is only called when all VF(s) are being reset (i.e. PCIe Reset on the
+ * host, PFR, CORER, etc.).
+ */
+static int ice_vf_rebuild_vsi(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	if (ice_vsi_rebuild(vsi, true)) {
+		dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n",
+			vf->vf_id);
+		return -EIO;
+	}
+	/* vsi->idx will remain the same in this case so don't update
+	 * vf->lan_vsi_idx
+	 */
+	vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+	vf->lan_vsi_num = vsi->vsi_num;
+
+	return 0;
+}
+
+/**
+ * ice_vf_set_initialized - VF is ready for VIRTCHNL communication
+ * @vf: VF to set in initialized state
+ *
+ * After this function the VF will be ready to receive/handle the
+ * VIRTCHNL_OP_GET_VF_RESOURCES message
+ */
+static void ice_vf_set_initialized(struct ice_vf *vf)
+{
+	ice_set_vf_state_qs_dis(vf);
+	clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
+	set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+}
+
+/**
+ * ice_vf_post_vsi_rebuild - tasks to do after the VF's VSI have been rebuilt
+ * @vf: VF to perform tasks on
+ */
+static void ice_vf_post_vsi_rebuild(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+
+	ice_vf_rebuild_host_cfg(vf);
+
+	ice_vf_set_initialized(vf);
+	ice_ena_vf_mappings(vf);
+	wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+}
+
+/**
+ * ice_reset_all_vfs - reset all allocated VFs in one go
+ * @pf: pointer to the PF structure
+ * @is_vflr: true if VFLR was issued, false if not
+ *
+ * First, tell the hardware to reset each VF, then do all the waiting in one
+ * chunk, and finally finish restoring each VF after the wait. This is useful
+ * during PF routines which need to reset all VFs, as otherwise it must perform
+ * these resets in a serialized fashion.
+ *
+ * Returns true if any VFs were reset, and false otherwise.
+ */
+bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf;
+	int v, i;
+
+	/* If we don't have any VFs, then there is nothing to reset */
+	if (!pf->num_alloc_vfs)
+		return false;
+
+	/* If VFs have been disabled, there is no need to reset */
+	if (test_and_set_bit(__ICE_VF_DIS, pf->state))
+		return false;
+
+	/* Begin reset on all VFs at once */
+	ice_for_each_vf(pf, v)
+		ice_trigger_vf_reset(&pf->vf[v], is_vflr, true);
+
+	/* HW requires some time to make sure it can flush the FIFO for a VF
+	 * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in
+	 * sequence to make sure that it has completed. We'll keep track of
+	 * the VFs using a simple iterator that increments once that VF has
+	 * finished resetting.
+	 */
+	for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
+		/* Check each VF in sequence */
+		while (v < pf->num_alloc_vfs) {
+			u32 reg;
+
+			vf = &pf->vf[v];
+			reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id));
+			if (!(reg & VPGEN_VFRSTAT_VFRD_M)) {
+				/* only delay if the check failed */
+				usleep_range(10, 20);
+				break;
+			}
+
+			/* If the current VF has finished resetting, move on
+			 * to the next VF in sequence.
+			 */
+			v++;
+		}
+	}
+
+	/* Display a warning if at least one VF didn't manage to reset in
+	 * time, but continue on with the operation.
+	 */
+	if (v < pf->num_alloc_vfs)
+		dev_warn(dev, "VF reset check timeout\n");
+
+	/* free VF resources to begin resetting the VSI state */
+	ice_for_each_vf(pf, v) {
+		vf = &pf->vf[v];
+
+		mutex_lock(&vf->cfg_lock);
+
+		ice_vf_pre_vsi_rebuild(vf);
+		ice_vf_rebuild_vsi(vf);
+		ice_vf_post_vsi_rebuild(vf);
+
+		mutex_unlock(&vf->cfg_lock);
+	}
+
+	ice_flush(hw);
+	clear_bit(__ICE_VF_DIS, pf->state);
+
+	return true;
+}
+
+/**
+ * ice_is_vf_disabled
+ * @vf: pointer to the VF info
+ *
+ * Returns true if the PF or VF is disabled, false otherwise.
+ */
+static bool ice_is_vf_disabled(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+
+	/* If the PF has been disabled, there is no need resetting VF until
+	 * PF is active again. Similarly, if the VF has been disabled, this
+	 * means something else is resetting the VF, so we shouldn't continue.
+	 * Otherwise, set disable VF state bit for actual reset, and continue.
+	 */
+	return (test_bit(__ICE_VF_DIS, pf->state) ||
+		test_bit(ICE_VF_STATE_DIS, vf->vf_states));
+}
+
+/**
+ * ice_reset_vf - Reset a particular VF
+ * @vf: pointer to the VF structure
+ * @is_vflr: true if VFLR was issued, false if not
+ *
+ * Returns true if the VF is currently in reset, resets successfully, or resets
+ * are disabled and false otherwise.
+ */
+bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	struct ice_hw *hw;
+	bool rsd = false;
+	u8 promisc_m;
+	u32 reg;
+	int i;
+
+	lockdep_assert_held(&vf->cfg_lock);
+
+	dev = ice_pf_to_dev(pf);
+
+	if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) {
+		dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n",
+			vf->vf_id);
+		return true;
+	}
+
+	if (ice_is_vf_disabled(vf)) {
+		dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
+			vf->vf_id);
+		return true;
+	}
+
+	/* Set VF disable bit state here, before triggering reset */
+	set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+	ice_trigger_vf_reset(vf, is_vflr, false);
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	ice_dis_vf_qs(vf);
+
+	/* Call Disable LAN Tx queue AQ whether or not queues are
+	 * enabled. This is needed for successful completion of VFR.
+	 */
+	ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
+			NULL, ICE_VF_RESET, vf->vf_id, NULL);
+
+	hw = &pf->hw;
+	/* poll VPGEN_VFRSTAT reg to make sure
+	 * that reset is complete
+	 */
+	for (i = 0; i < 10; i++) {
+		/* VF reset requires driver to first reset the VF and then
+		 * poll the status register to make sure that the reset
+		 * completed successfully.
+		 */
+		reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id));
+		if (reg & VPGEN_VFRSTAT_VFRD_M) {
+			rsd = true;
+			break;
+		}
+
+		/* only sleep if the reset is not done */
+		usleep_range(10, 20);
+	}
+
+	/* Display a warning if VF didn't manage to reset in time, but need to
+	 * continue on with the operation.
+	 */
+	if (!rsd)
+		dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id);
+
+	/* disable promiscuous modes in case they were enabled
+	 * ignore any error if disabling process failed
+	 */
+	if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+	    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
+		if (vf->port_vlan_info || vsi->num_vlan)
+			promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
+		else
+			promisc_m = ICE_UCAST_PROMISC_BITS;
+
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true))
+			dev_err(dev, "disabling promiscuous mode failed\n");
+	}
+
+	ice_vf_pre_vsi_rebuild(vf);
+
+	if (ice_vf_rebuild_vsi_with_release(vf)) {
+		dev_err(dev, "Failed to release and setup the VF%u's VSI\n", vf->vf_id);
+		return false;
+	}
+
+	ice_vf_post_vsi_rebuild(vf);
+
+	return true;
+}
+
+/**
+ * ice_vc_notify_link_state - Inform all VFs on a PF of link status
+ * @pf: pointer to the PF structure
+ */
+void ice_vc_notify_link_state(struct ice_pf *pf)
+{
+	int i;
+
+	ice_for_each_vf(pf, i)
+		ice_vc_notify_vf_link_state(&pf->vf[i]);
+}
+
+/**
+ * ice_vc_notify_reset - Send pending reset message to all VFs
+ * @pf: pointer to the PF structure
+ *
+ * indicate a pending reset to all VFs on a given PF
+ */
+void ice_vc_notify_reset(struct ice_pf *pf)
+{
+	struct virtchnl_pf_event pfe;
+
+	if (!pf->num_alloc_vfs)
+		return;
+
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	ice_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS,
+			    (u8 *)&pfe, sizeof(struct virtchnl_pf_event));
+}
+
+/**
+ * ice_vc_notify_vf_reset - Notify VF of a reset event
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_notify_vf_reset(struct ice_vf *vf)
+{
+	struct virtchnl_pf_event pfe;
+	struct ice_pf *pf;
+
+	if (!vf)
+		return;
+
+	pf = vf->pf;
+	if (ice_validate_vf_id(pf, vf->vf_id))
+		return;
+
+	/* Bail out if VF is in disabled state, neither initialized, nor active
+	 * state - otherwise proceed with notifications
+	 */
+	if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+	     !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) ||
+	    test_bit(ICE_VF_STATE_DIS, vf->vf_states))
+		return;
+
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe),
+			      NULL);
+}
+
+/**
+ * ice_init_vf_vsi_res - initialize/setup VF VSI resources
+ * @vf: VF to initialize/setup the VSI for
+ *
+ * This function creates a VSI for the VF, adds a VLAN 0 filter, and sets up the
+ * VF VSI's broadcast filter and is only used during initial VF creation.
+ */
+static int ice_init_vf_vsi_res(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	u8 broadcast[ETH_ALEN];
+	enum ice_status status;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	int err;
+
+	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
+
+	dev = ice_pf_to_dev(pf);
+	vsi = ice_vf_vsi_setup(vf);
+	if (!vsi)
+		return -ENOMEM;
+
+	err = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
+	if (err) {
+		dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n",
+			 vf->vf_id);
+		goto release_vsi;
+	}
+
+	eth_broadcast_addr(broadcast);
+	status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
+	if (status) {
+		dev_err(dev, "Failed to add broadcast MAC filter for VF %d, status %s\n",
+			vf->vf_id, ice_stat_str(status));
+		err = ice_status_to_errno(status);
+		goto release_vsi;
+	}
+
+	vf->num_mac = 1;
+
+	return 0;
+
+release_vsi:
+	ice_vf_vsi_release(vf);
+	return err;
+}
+
+/**
+ * ice_start_vfs - start VFs so they are ready to be used by SR-IOV
+ * @pf: PF the VFs are associated with
+ */
+static int ice_start_vfs(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	int retval, i;
+
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		ice_clear_vf_reset_trigger(vf);
+
+		retval = ice_init_vf_vsi_res(vf);
+		if (retval) {
+			dev_err(ice_pf_to_dev(pf), "Failed to initialize VSI resources for VF %d, error %d\n",
+				vf->vf_id, retval);
+			goto teardown;
+		}
+
+		set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+		ice_ena_vf_mappings(vf);
+		wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+	}
+
+	ice_flush(hw);
+	return 0;
+
+teardown:
+	for (i = i - 1; i >= 0; i--) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		ice_dis_vf_mappings(vf);
+		ice_vf_vsi_release(vf);
+	}
+
+	return retval;
+}
+
+/**
+ * ice_set_dflt_settings - set VF defaults during initialization/creation
+ * @pf: PF holding reference to all VFs for default configuration
+ */
+static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
+{
+	int i;
+
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		vf->pf = pf;
+		vf->vf_id = i;
+		vf->vf_sw_id = pf->first_sw;
+		/* assign default capabilities */
+		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps);
+		vf->spoofchk = true;
+		vf->num_vf_qs = pf->num_qps_per_vf;
+
+		mutex_init(&vf->cfg_lock);
+	}
+}
+
+/**
+ * ice_alloc_vfs - allocate num_vfs in the PF structure
+ * @pf: PF to store the allocated VFs in
+ * @num_vfs: number of VFs to allocate
+ */
+static int ice_alloc_vfs(struct ice_pf *pf, int num_vfs)
+{
+	struct ice_vf *vfs;
+
+	vfs = devm_kcalloc(ice_pf_to_dev(pf), num_vfs, sizeof(*vfs),
+			   GFP_KERNEL);
+	if (!vfs)
+		return -ENOMEM;
+
+	pf->vf = vfs;
+	pf->num_alloc_vfs = num_vfs;
+
+	return 0;
+}
+
+/**
+ * ice_ena_vfs - enable VFs so they are ready to be used
+ * @pf: pointer to the PF structure
+ * @num_vfs: number of VFs to enable
+ */
+static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int ret;
+
+	/* Disable global interrupt 0 so we don't try to handle the VFLR. */
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
+	set_bit(__ICE_OICR_INTR_DIS, pf->state);
+	ice_flush(hw);
+
+	ret = pci_enable_sriov(pf->pdev, num_vfs);
+	if (ret) {
+		pf->num_alloc_vfs = 0;
+		goto err_unroll_intr;
+	}
+
+	ret = ice_alloc_vfs(pf, num_vfs);
+	if (ret)
+		goto err_pci_disable_sriov;
+
+	if (ice_set_per_vf_res(pf)) {
+		dev_err(dev, "Not enough resources for %d VFs, try with fewer number of VFs\n",
+			num_vfs);
+		ret = -ENOSPC;
+		goto err_unroll_sriov;
+	}
+
+	ice_set_dflt_settings_vfs(pf);
+
+	if (ice_start_vfs(pf)) {
+		dev_err(dev, "Failed to start VF(s)\n");
+		ret = -EAGAIN;
+		goto err_unroll_sriov;
+	}
+
+	clear_bit(__ICE_VF_DIS, pf->state);
+	return 0;
+
+err_unroll_sriov:
+	devm_kfree(dev, pf->vf);
+	pf->vf = NULL;
+	pf->num_alloc_vfs = 0;
+err_pci_disable_sriov:
+	pci_disable_sriov(pf->pdev);
+err_unroll_intr:
+	/* rearm interrupts here */
+	ice_irq_dynamic_ena(hw, NULL, NULL);
+	clear_bit(__ICE_OICR_INTR_DIS, pf->state);
+	return ret;
+}
+
+/**
+ * ice_pci_sriov_ena - Enable or change number of VFs
+ * @pf: pointer to the PF structure
+ * @num_vfs: number of VFs to allocate
+ *
+ * Returns 0 on success and negative on failure
+ */
+static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
+{
+	int pre_existing_vfs = pci_num_vf(pf->pdev);
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	if (pre_existing_vfs && pre_existing_vfs != num_vfs)
+		ice_free_vfs(pf);
+	else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
+		return 0;
+
+	if (num_vfs > pf->num_vfs_supported) {
+		dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n",
+			num_vfs, pf->num_vfs_supported);
+		return -EOPNOTSUPP;
+	}
+
+	dev_info(dev, "Enabling %d VFs\n", num_vfs);
+	err = ice_ena_vfs(pf, num_vfs);
+	if (err) {
+		dev_err(dev, "Failed to enable SR-IOV: %d\n", err);
+		return err;
+	}
+
+	set_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+	return 0;
+}
+
+/**
+ * ice_check_sriov_allowed - check if SR-IOV is allowed based on various checks
+ * @pf: PF to enabled SR-IOV on
+ */
+static int ice_check_sriov_allowed(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
+		dev_err(dev, "This device is not capable of SR-IOV\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (ice_is_safe_mode(pf)) {
+		dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!ice_pf_state_is_nominal(pf)) {
+		dev_err(dev, "Cannot enable SR-IOV, device not ready\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sriov_configure - Enable or change number of VFs via sysfs
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of VFs to allocate or 0 to free VFs
+ *
+ * This function is called when the user updates the number of VFs in sysfs. On
+ * success return whatever num_vfs was set to by the caller. Return negative on
+ * failure.
+ */
+int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	err = ice_check_sriov_allowed(pf);
+	if (err)
+		return err;
+
+	if (!num_vfs) {
+		if (!pci_vfs_assigned(pdev)) {
+			ice_free_vfs(pf);
+			return 0;
+		}
+
+		dev_err(dev, "can't free VFs because some are assigned to VMs.\n");
+		return -EBUSY;
+	}
+
+	err = ice_pci_sriov_ena(pf, num_vfs);
+	if (err)
+		return err;
+
+	return num_vfs;
+}
+
+/**
+ * ice_process_vflr_event - Free VF resources via IRQ calls
+ * @pf: pointer to the PF structure
+ *
+ * called from the VFLR IRQ handler to
+ * free up VF resources and state variables
+ */
+void ice_process_vflr_event(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	unsigned int vf_id;
+	u32 reg;
+
+	if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
+	    !pf->num_alloc_vfs)
+		return;
+
+	ice_for_each_vf(pf, vf_id) {
+		struct ice_vf *vf = &pf->vf[vf_id];
+		u32 reg_idx, bit_idx;
+
+		reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
+		bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+		/* read GLGEN_VFLRSTAT register to find out the flr VFs */
+		reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
+		if (reg & BIT(bit_idx)) {
+			/* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
+			mutex_lock(&vf->cfg_lock);
+			ice_reset_vf(vf, true);
+			mutex_unlock(&vf->cfg_lock);
+		}
+	}
+}
+
+/**
+ * ice_vc_reset_vf - Perform software reset on the VF after informing the AVF
+ * @vf: pointer to the VF info
+ */
+static void ice_vc_reset_vf(struct ice_vf *vf)
+{
+	ice_vc_notify_vf_reset(vf);
+	ice_reset_vf(vf, false);
+}
+
+/**
+ * ice_get_vf_from_pfq - get the VF who owns the PF space queue passed in
+ * @pf: PF used to index all VFs
+ * @pfq: queue index relative to the PF's function space
+ *
+ * If no VF is found who owns the pfq then return NULL, otherwise return a
+ * pointer to the VF who owns the pfq
+ */
+static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq)
+{
+	unsigned int vf_id;
+
+	ice_for_each_vf(pf, vf_id) {
+		struct ice_vf *vf = &pf->vf[vf_id];
+		struct ice_vsi *vsi;
+		u16 rxq_idx;
+
+		vsi = pf->vsi[vf->lan_vsi_idx];
+
+		ice_for_each_rxq(vsi, rxq_idx)
+			if (vsi->rxq_map[rxq_idx] == pfq)
+				return vf;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_globalq_to_pfq - convert from global queue index to PF space queue index
+ * @pf: PF used for conversion
+ * @globalq: global queue index used to convert to PF space queue index
+ */
+static u32 ice_globalq_to_pfq(struct ice_pf *pf, u32 globalq)
+{
+	return globalq - pf->hw.func_caps.common_cap.rxq_first_id;
+}
+
+/**
+ * ice_vf_lan_overflow_event - handle LAN overflow event for a VF
+ * @pf: PF that the LAN overflow event happened on
+ * @event: structure holding the event information for the LAN overflow event
+ *
+ * Determine if the LAN overflow event was caused by a VF queue. If it was not
+ * caused by a VF, do nothing. If a VF caused this LAN overflow event trigger a
+ * reset on the offending VF.
+ */
+void
+ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+	u32 gldcb_rtctq, queue;
+	struct ice_vf *vf;
+
+	gldcb_rtctq = le32_to_cpu(event->desc.params.lan_overflow.prtdcb_ruptq);
+	dev_dbg(ice_pf_to_dev(pf), "GLDCB_RTCTQ: 0x%08x\n", gldcb_rtctq);
+
+	/* event returns device global Rx queue number */
+	queue = (gldcb_rtctq & GLDCB_RTCTQ_RXQNUM_M) >>
+		GLDCB_RTCTQ_RXQNUM_S;
+
+	vf = ice_get_vf_from_pfq(pf, ice_globalq_to_pfq(pf, queue));
+	if (!vf)
+		return;
+
+	mutex_lock(&vf->cfg_lock);
+	ice_vc_reset_vf(vf);
+	mutex_unlock(&vf->cfg_lock);
+}
+
+/**
+ * ice_vc_send_msg_to_vf - Send message to VF
+ * @vf: pointer to the VF info
+ * @v_opcode: virtual channel opcode
+ * @v_retval: virtual channel return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * send msg to VF
+ */
+static int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+	enum ice_status aq_ret;
+	struct device *dev;
+	struct ice_pf *pf;
+
+	if (!vf)
+		return -EINVAL;
+
+	pf = vf->pf;
+	if (ice_validate_vf_id(pf, vf->vf_id))
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
+
+	aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
+				       msg, msglen, NULL);
+	if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
+		dev_info(dev, "Unable to send the message to VF %d ret %s aq_err %s\n",
+			 vf->vf_id, ice_stat_str(aq_ret),
+			 ice_aq_str(pf->hw.mailboxq.sq_last_status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_get_ver_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request the API version used by the PF
+ */
+static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_version_info info = {
+		VIRTCHNL_VERSION_MAJOR, VIRTCHNL_VERSION_MINOR
+	};
+
+	vf->vf_ver = *(struct virtchnl_version_info *)msg;
+	/* VFs running the 1.0 API expect to get 1.0 back or they will cry. */
+	if (VF_IS_V10(&vf->vf_ver))
+		info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
+
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
+				     VIRTCHNL_STATUS_SUCCESS, (u8 *)&info,
+				     sizeof(struct virtchnl_version_info));
+}
+
+/**
+ * ice_vc_get_max_frame_size - get max frame size allowed for VF
+ * @vf: VF used to determine max frame size
+ *
+ * Max frame size is determined based on the current port's max frame size and
+ * whether a port VLAN is configured on this VF. The VF is not aware whether
+ * it's in a port VLAN so the PF needs to account for this in max frame size
+ * checks and sending the max frame size to the VF.
+ */
+static u16 ice_vc_get_max_frame_size(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct ice_port_info *pi = vsi->port_info;
+	u16 max_frame_size;
+
+	max_frame_size = pi->phy.link_info.max_frame_size;
+
+	if (vf->port_vlan_info)
+		max_frame_size -= VLAN_HLEN;
+
+	return max_frame_size;
+}
+
+/**
+ * ice_vc_get_vf_res_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request its resources
+ */
+static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vf_resource *vfres = NULL;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int len = 0;
+	int ret;
+
+	if (ice_check_vf_init(pf, vf)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	len = sizeof(struct virtchnl_vf_resource);
+
+	vfres = kzalloc(len, GFP_KERNEL);
+	if (!vfres) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		len = 0;
+		goto err;
+	}
+	if (VF_IS_V11(&vf->vf_ver))
+		vf->driver_caps = *(u32 *)msg;
+	else
+		vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
+				  VIRTCHNL_VF_OFFLOAD_RSS_REG |
+				  VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	if (!vsi->info.pvid)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
+	} else {
+		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ)
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+		else
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
+	}
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+
+	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+
+	vfres->num_vsis = 1;
+	/* Tx and Rx queue are equal for VF */
+	vfres->num_queue_pairs = vsi->num_txq;
+	vfres->max_vectors = pf->num_msix_per_vf;
+	vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE;
+	vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+	vfres->max_mtu = ice_vc_get_max_frame_size(vf);
+
+	vfres->vsi_res[0].vsi_id = vf->lan_vsi_num;
+	vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
+	vfres->vsi_res[0].num_queue_pairs = vsi->num_txq;
+	ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
+			vf->dflt_lan_addr.addr);
+
+	/* match guest capabilities */
+	vf->driver_caps = vfres->vf_cap_flags;
+
+	set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+
+err:
+	/* send the response back to the VF */
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret,
+				    (u8 *)vfres, len);
+
+	kfree(vfres);
+	return ret;
+}
+
+/**
+ * ice_vc_reset_vf_msg
+ * @vf: pointer to the VF info
+ *
+ * called from the VF to reset itself,
+ * unlike other virtchnl messages, PF driver
+ * doesn't send the response back to the VF
+ */
+static void ice_vc_reset_vf_msg(struct ice_vf *vf)
+{
+	if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+		ice_reset_vf(vf, false);
+}
+
+/**
+ * ice_find_vsi_from_id
+ * @pf: the PF structure to search for the VSI
+ * @id: ID of the VSI it is searching for
+ *
+ * searches for the VSI with the given ID
+ */
+static struct ice_vsi *ice_find_vsi_from_id(struct ice_pf *pf, u16 id)
+{
+	int i;
+
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && pf->vsi[i]->vsi_num == id)
+			return pf->vsi[i];
+
+	return NULL;
+}
+
+/**
+ * ice_vc_isvalid_vsi_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VF relative VSI ID
+ *
+ * check for the valid VSI ID
+ */
+static bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = ice_find_vsi_from_id(pf, vsi_id);
+
+	return (vsi && (vsi->vf_id == vf->vf_id));
+}
+
+/**
+ * ice_vc_isvalid_q_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VSI ID
+ * @qid: VSI relative queue ID
+ *
+ * check for the valid queue ID
+ */
+static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
+{
+	struct ice_vsi *vsi = ice_find_vsi_from_id(vf->pf, vsi_id);
+	/* allocated Tx and Rx queues should be always equal for VF VSI */
+	return (vsi && (qid < vsi->alloc_txq));
+}
+
+/**
+ * ice_vc_isvalid_ring_len
+ * @ring_len: length of ring
+ *
+ * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
+ * or zero
+ */
+static bool ice_vc_isvalid_ring_len(u16 ring_len)
+{
+	return ring_len == 0 ||
+	       (ring_len >= ICE_MIN_NUM_DESC &&
+		ring_len <= ICE_MAX_NUM_DESC &&
+		!(ring_len % ICE_REQ_DESC_MULTIPLE));
+}
+
+/**
+ * ice_vc_config_rss_key
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS key
+ */
+static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_rss_key *vrk =
+		(struct virtchnl_rss_key *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_set_rss(vsi, vrk->key, NULL, 0))
+		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_lut
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS LUT
+ */
+static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vrl->lut_entries != ICE_VSIQF_HLUT_ARRAY_SIZE) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_set_rss(vsi, NULL, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE))
+		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset
+ * @vf: The VF being resseting
+ *
+ * The max poll time is about ~800ms, which is about the maximum time it takes
+ * for a VF to be reset and/or a VF driver to be removed.
+ */
+static void ice_wait_on_vf_reset(struct ice_vf *vf)
+{
+	int i;
+
+	for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) {
+		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+			break;
+		msleep(ICE_MAX_VF_RESET_SLEEP_MS);
+	}
+}
+
+/**
+ * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried
+ * @vf: VF to check if it's ready to be configured/queried
+ *
+ * The purpose of this function is to make sure the VF is not in reset, not
+ * disabled, and initialized so it can be configured and/or queried by a host
+ * administrator.
+ */
+static int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
+{
+	struct ice_pf *pf;
+
+	ice_wait_on_vf_reset(vf);
+
+	if (ice_is_vf_disabled(vf))
+		return -EINVAL;
+
+	pf = vf->pf;
+	if (ice_check_vf_init(pf, vf))
+		return -EBUSY;
+
+	return 0;
+}
+
+/**
+ * ice_set_vf_spoofchk
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ena: flag to enable or disable feature
+ *
+ * Enable or disable VF spoof checking
+ */
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_vsi_ctx *ctx;
+	struct ice_vsi *vf_vsi;
+	enum ice_status status;
+	struct device *dev;
+	struct ice_vf *vf;
+	int ret;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		return ret;
+
+	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vf_vsi) {
+		netdev_err(netdev, "VSI %d for VF %d is null\n",
+			   vf->lan_vsi_idx, vf->vf_id);
+		return -EINVAL;
+	}
+
+	if (vf_vsi->type != ICE_VSI_VF) {
+		netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n",
+			   vf_vsi->type, vf_vsi->vsi_num, vf->vf_id);
+		return -ENODEV;
+	}
+
+	if (ena == vf->spoofchk) {
+		dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF");
+		return 0;
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->info.sec_flags = vf_vsi->info.sec_flags;
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+	if (ena) {
+		ctx->info.sec_flags |=
+			ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+			(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+			 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+	} else {
+		ctx->info.sec_flags &=
+			~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+			  (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+			   ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
+	}
+
+	status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL);
+	if (status) {
+		dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %s\n",
+			ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num,
+			ice_stat_str(status));
+		ret = -EIO;
+		goto out;
+	}
+
+	/* only update spoofchk state and VSI context on success */
+	vf_vsi->info.sec_flags = ctx->info.sec_flags;
+	vf->spoofchk = ena;
+
+out:
+	kfree(ctx);
+	return ret;
+}
+
+/**
+ * ice_is_any_vf_in_promisc - check if any VF(s) are in promiscuous mode
+ * @pf: PF structure for accessing VF(s)
+ *
+ * Return false if no VF(s) are in unicast and/or multicast promiscuous mode,
+ * else return true
+ */
+bool ice_is_any_vf_in_promisc(struct ice_pf *pf)
+{
+	int vf_idx;
+
+	ice_for_each_vf(pf, vf_idx) {
+		struct ice_vf *vf = &pf->vf[vf_idx];
+
+		/* found a VF that has promiscuous mode configured */
+		if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+		    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_vc_cfg_promiscuous_mode_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure VF VSIs promiscuous mode
+ */
+static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_promisc_info *info =
+	    (struct virtchnl_promisc_info *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	bool rm_promisc;
+	int ret = 0;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	dev = ice_pf_to_dev(pf);
+	if (!test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+		dev_err(dev, "Unprivileged VF %d is attempting to configure promiscuous mode\n",
+			vf->vf_id);
+		/* Leave v_ret alone, lie to the VF on purpose. */
+		goto error_param;
+	}
+
+	rm_promisc = !(info->flags & FLAG_VF_UNICAST_PROMISC) &&
+		!(info->flags & FLAG_VF_MULTICAST_PROMISC);
+
+	if (vsi->num_vlan || vf->port_vlan_info) {
+		struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
+		struct net_device *pf_netdev;
+
+		if (!pf_vsi) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		pf_netdev = pf_vsi->netdev;
+
+		ret = ice_set_vf_spoofchk(pf_netdev, vf->vf_id, rm_promisc);
+		if (ret) {
+			dev_err(dev, "Failed to update spoofchk to %s for VF %d VSI %d when setting promiscuous mode\n",
+				rm_promisc ? "ON" : "OFF", vf->vf_id,
+				vsi->vsi_num);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		}
+
+		ret = ice_cfg_vlan_pruning(vsi, true, !rm_promisc);
+		if (ret) {
+			dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n");
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+	}
+
+	if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
+		bool set_dflt_vsi = !!(info->flags & FLAG_VF_UNICAST_PROMISC);
+
+		if (set_dflt_vsi && !ice_is_dflt_vsi_in_use(pf->first_sw))
+			/* only attempt to set the default forwarding VSI if
+			 * it's not currently set
+			 */
+			ret = ice_set_dflt_vsi(pf->first_sw, vsi);
+		else if (!set_dflt_vsi &&
+			 ice_is_vsi_dflt_vsi(pf->first_sw, vsi))
+			/* only attempt to free the default forwarding VSI if we
+			 * are the owner
+			 */
+			ret = ice_clear_dflt_vsi(pf->first_sw);
+
+		if (ret) {
+			dev_err(dev, "%sable VF %d as the default VSI failed, error %d\n",
+				set_dflt_vsi ? "en" : "dis", vf->vf_id, ret);
+			v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+			goto error_param;
+		}
+	} else {
+		enum ice_status status;
+		u8 promisc_m;
+
+		if (info->flags & FLAG_VF_UNICAST_PROMISC) {
+			if (vf->port_vlan_info || vsi->num_vlan)
+				promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_UCAST_PROMISC_BITS;
+		} else if (info->flags & FLAG_VF_MULTICAST_PROMISC) {
+			if (vf->port_vlan_info || vsi->num_vlan)
+				promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_MCAST_PROMISC_BITS;
+		} else {
+			if (vf->port_vlan_info || vsi->num_vlan)
+				promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_UCAST_PROMISC_BITS;
+		}
+
+		/* Configure multicast/unicast with or without VLAN promiscuous
+		 * mode
+		 */
+		status = ice_vf_set_vsi_promisc(vf, vsi, promisc_m, rm_promisc);
+		if (status) {
+			dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed, error: %s\n",
+				rm_promisc ? "dis" : "en", vf->vf_id,
+				ice_stat_str(status));
+			v_ret = ice_err_to_virt_err(status);
+			goto error_param;
+		} else {
+			dev_dbg(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d succeeded\n",
+				rm_promisc ? "dis" : "en", vf->vf_id);
+		}
+	}
+
+	if (info->flags & FLAG_VF_MULTICAST_PROMISC)
+		set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+	else
+		clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+
+	if (info->flags & FLAG_VF_UNICAST_PROMISC)
+		set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+	else
+		clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_get_stats_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to get VSI stats
+ */
+static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+		(struct virtchnl_queue_select *)msg;
+	struct ice_eth_stats stats = { 0 };
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	ice_update_eth_stats(vsi);
+
+	stats = vsi->eth_stats;
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, v_ret,
+				     (u8 *)&stats, sizeof(stats));
+}
+
+/**
+ * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL
+ * @vqs: virtchnl_queue_select structure containing bitmaps to validate
+ *
+ * Return true on successful validation, else false
+ */
+static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs)
+{
+	if ((!vqs->rx_queues && !vqs->tx_queues) ||
+	    vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) ||
+	    vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+static void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 pfq = vsi->txq_map[q_idx];
+	u32 reg;
+
+	reg = rd32(hw, QINT_TQCTL(pfq));
+
+	/* MSI-X index 0 in the VF's space is always for the OICR, which means
+	 * this is most likely a poll mode VF driver, so don't enable an
+	 * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+	 */
+	if (!(reg & QINT_TQCTL_MSIX_INDX_M))
+		return;
+
+	wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+static void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 pfq = vsi->rxq_map[q_idx];
+	u32 reg;
+
+	reg = rd32(hw, QINT_RQCTL(pfq));
+
+	/* MSI-X index 0 in the VF's space is always for the OICR, which means
+	 * this is most likely a poll mode VF driver, so don't enable an
+	 * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+	 */
+	if (!(reg & QINT_RQCTL_MSIX_INDX_M))
+		return;
+
+	wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vc_ena_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to enable all or specific queue(s)
+ */
+static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	unsigned long q_map;
+	u16 vf_q_id;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_validate_vqs_bitmaps(vqs)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	/* Enable only Rx rings, Tx rings were enabled by the FW when the
+	 * Tx queue group list was configured and the context bits were
+	 * programmed using ice_vsi_cfg_txqs
+	 */
+	q_map = vqs->rx_queues;
+	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* Skip queue if enabled */
+		if (test_bit(vf_q_id, vf->rxq_ena))
+			continue;
+
+		if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) {
+			dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n",
+				vf_q_id, vsi->vsi_num);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		ice_vf_ena_rxq_interrupt(vsi, vf_q_id);
+		set_bit(vf_q_id, vf->rxq_ena);
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	q_map = vqs->tx_queues;
+	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* Skip queue if enabled */
+		if (test_bit(vf_q_id, vf->txq_ena))
+			continue;
+
+		ice_vf_ena_txq_interrupt(vsi, vf_q_id);
+		set_bit(vf_q_id, vf->txq_ena);
+	}
+
+	/* Set flag to indicate that queues are enabled */
+	if (v_ret == VIRTCHNL_STATUS_SUCCESS)
+		set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_dis_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to disable all or specific
+ * queue(s)
+ */
+static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	unsigned long q_map;
+	u16 vf_q_id;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) &&
+	    !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_validate_vqs_bitmaps(vqs)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vqs->tx_queues) {
+		q_map = vqs->tx_queues;
+
+		for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+			struct ice_ring *ring = vsi->tx_rings[vf_q_id];
+			struct ice_txq_meta txq_meta = { 0 };
+
+			if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			if (!test_bit(vf_q_id, vf->txq_ena))
+				dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
+					vf_q_id, vsi->vsi_num);
+
+			ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+			if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id,
+						 ring, &txq_meta)) {
+				dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
+					vf_q_id, vsi->vsi_num);
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Clear enabled queues flag */
+			clear_bit(vf_q_id, vf->txq_ena);
+		}
+	}
+
+	q_map = vqs->rx_queues;
+	/* speed up Rx queue disable by batching them if possible */
+	if (q_map &&
+	    bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) {
+		if (ice_vsi_stop_all_rx_rings(vsi)) {
+			dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n",
+				vsi->vsi_num);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+	} else if (q_map) {
+		for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+			if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Skip queue if not enabled */
+			if (!test_bit(vf_q_id, vf->rxq_ena))
+				continue;
+
+			if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id,
+						     true)) {
+				dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n",
+					vf_q_id, vsi->vsi_num);
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Clear enabled queues flag */
+			clear_bit(vf_q_id, vf->rxq_ena);
+		}
+	}
+
+	/* Clear enabled queues flag */
+	if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf))
+		clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_cfg_interrupt
+ * @vf: pointer to the VF info
+ * @vsi: the VSI being configured
+ * @vector_id: vector ID
+ * @map: vector map for mapping vectors to queues
+ * @q_vector: structure for interrupt vector
+ * configure the IRQ to queue map
+ */
+static int
+ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id,
+		  struct virtchnl_vector_map *map,
+		  struct ice_q_vector *q_vector)
+{
+	u16 vsi_q_id, vsi_q_id_idx;
+	unsigned long qmap;
+
+	q_vector->num_ring_rx = 0;
+	q_vector->num_ring_tx = 0;
+
+	qmap = map->rxq_map;
+	for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+		vsi_q_id = vsi_q_id_idx;
+
+		if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
+			return VIRTCHNL_STATUS_ERR_PARAM;
+
+		q_vector->num_ring_rx++;
+		q_vector->rx.itr_idx = map->rxitr_idx;
+		vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
+		ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id,
+				      q_vector->rx.itr_idx);
+	}
+
+	qmap = map->txq_map;
+	for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+		vsi_q_id = vsi_q_id_idx;
+
+		if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
+			return VIRTCHNL_STATUS_ERR_PARAM;
+
+		q_vector->num_ring_tx++;
+		q_vector->tx.itr_idx = map->txitr_idx;
+		vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
+		ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id,
+				      q_vector->tx.itr_idx);
+	}
+
+	return VIRTCHNL_STATUS_SUCCESS;
+}
+
+/**
+ * ice_vc_cfg_irq_map_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the IRQ to queue map
+ */
+static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	u16 num_q_vectors_mapped, vsi_id, vector_id;
+	struct virtchnl_irq_map_info *irqmap_info;
+	struct virtchnl_vector_map *map;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int i;
+
+	irqmap_info = (struct virtchnl_irq_map_info *)msg;
+	num_q_vectors_mapped = irqmap_info->num_vectors;
+
+	/* Check to make sure number of VF vectors mapped is not greater than
+	 * number of VF vectors originally allocated, and check that
+	 * there is actually at least a single VF queue vector mapped
+	 */
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+	    pf->num_msix_per_vf < num_q_vectors_mapped ||
+	    !num_q_vectors_mapped) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	for (i = 0; i < num_q_vectors_mapped; i++) {
+		struct ice_q_vector *q_vector;
+
+		map = &irqmap_info->vecmap[i];
+
+		vector_id = map->vector_id;
+		vsi_id = map->vsi_id;
+		/* vector_id is always 0-based for each VF, and can never be
+		 * larger than or equal to the max allowed interrupts per VF
+		 */
+		if (!(vector_id < pf->num_msix_per_vf) ||
+		    !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+		    (!vector_id && (map->rxq_map || map->txq_map))) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* No need to map VF miscellaneous or rogue vector */
+		if (!vector_id)
+			continue;
+
+		/* Subtract non queue vector from vector_id passed by VF
+		 * to get actual number of VSI queue vector array index
+		 */
+		q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
+		if (!q_vector) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* lookout for the invalid queue index */
+		v_ret = (enum virtchnl_status_code)
+			ice_cfg_interrupt(vf, vsi, vector_id, map, q_vector);
+		if (v_ret)
+			goto error_param;
+	}
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the Rx/Tx queues
+ */
+static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vsi_queue_config_info *qci =
+	    (struct virtchnl_vsi_queue_config_info *)msg;
+	struct virtchnl_queue_pair_info *qpi;
+	u16 num_rxq = 0, num_txq = 0;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int i;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
+	    qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
+		dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
+			vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	for (i = 0; i < qci->num_queue_pairs; i++) {
+		qpi = &qci->qpair[i];
+		if (qpi->txq.vsi_id != qci->vsi_id ||
+		    qpi->rxq.vsi_id != qci->vsi_id ||
+		    qpi->rxq.queue_id != qpi->txq.queue_id ||
+		    qpi->txq.headwb_enabled ||
+		    !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
+		    !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
+		    !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+		/* copy Tx queue info from VF into VSI */
+		if (qpi->txq.ring_len > 0) {
+			num_txq++;
+			vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
+			vsi->tx_rings[i]->count = qpi->txq.ring_len;
+		}
+
+		/* copy Rx queue info from VF into VSI */
+		if (qpi->rxq.ring_len > 0) {
+			u16 max_frame_size = ice_vc_get_max_frame_size(vf);
+
+			num_rxq++;
+			vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
+			vsi->rx_rings[i]->count = qpi->rxq.ring_len;
+
+			if (qpi->rxq.databuffer_size != 0 &&
+			    (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
+			     qpi->rxq.databuffer_size < 1024)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+			vsi->rx_buf_len = qpi->rxq.databuffer_size;
+			vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
+			if (qpi->rxq.max_pkt_size > max_frame_size ||
+			    qpi->rxq.max_pkt_size < 64) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+		}
+
+		vsi->max_frame = qpi->rxq.max_pkt_size;
+		/* add space for the port VLAN since the VF driver is not
+		 * expected to account for it in the MTU calculation
+		 */
+		if (vf->port_vlan_info)
+			vsi->max_frame += VLAN_HLEN;
+	}
+
+	/* VF can request to configure less than allocated queues or default
+	 * allocated queues. So update the VSI with new number
+	 */
+	vsi->num_txq = num_txq;
+	vsi->num_rxq = num_rxq;
+	/* All queues of VF VSI are in TC 0 */
+	vsi->tc_cfg.tc_info[0].qcount_tx = num_txq;
+	vsi->tc_cfg.tc_info[0].qcount_rx = num_rxq;
+
+	if (ice_vsi_cfg_lan_txqs(vsi) || ice_vsi_cfg_rxqs(vsi))
+		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_is_vf_trusted
+ * @vf: pointer to the VF info
+ */
+static bool ice_is_vf_trusted(struct ice_vf *vf)
+{
+	return test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+}
+
+/**
+ * ice_can_vf_change_mac
+ * @vf: pointer to the VF info
+ *
+ * Return true if the VF is allowed to change its MAC filters, false otherwise
+ */
+static bool ice_can_vf_change_mac(struct ice_vf *vf)
+{
+	/* If the VF MAC address has been set administratively (via the
+	 * ndo_set_vf_mac command), then deny permission to the VF to
+	 * add/delete unicast MAC addresses, unless the VF is trusted
+	 */
+	if (vf->pf_set_mac && !ice_is_vf_trusted(vf))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_add_mac_addr - attempt to add the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @mac_addr: MAC address to add
+ */
+static int
+ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum ice_status status;
+	int ret = 0;
+
+	/* default unicast MAC already added */
+	if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+		return 0;
+
+	if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
+		dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
+		return -EPERM;
+	}
+
+	status = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
+	if (status == ICE_ERR_ALREADY_EXISTS) {
+		dev_dbg(dev, "MAC %pM already exists for VF %d\n", mac_addr,
+			vf->vf_id);
+		/* don't return since we might need to update
+		 * the primary MAC in ice_vfhw_mac_add() below
+		 */
+		ret = -EEXIST;
+	} else if (status) {
+		dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %s\n",
+			mac_addr, vf->vf_id, ice_stat_str(status));
+		return -EIO;
+	} else {
+		vf->num_mac++;
+	}
+
+	/* Set the default LAN address to the latest unicast MAC address added
+	 * by the VF. The default LAN address is reported by the PF via
+	 * ndo_get_vf_config.
+	 */
+	if (is_unicast_ether_addr(mac_addr))
+		ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr);
+
+	return ret;
+}
+
+/**
+ * ice_vc_del_mac_addr - attempt to delete the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @mac_addr: MAC address to delete
+ */
+static int
+ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum ice_status status;
+
+	if (!ice_can_vf_change_mac(vf) &&
+	    ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+		return 0;
+
+	status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
+	if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr,
+			vf->vf_id);
+		return -ENOENT;
+	} else if (status) {
+		dev_err(dev, "Failed to delete MAC %pM for VF %d, error %s\n",
+			mac_addr, vf->vf_id, ice_stat_str(status));
+		return -EIO;
+	}
+
+	if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+		eth_zero_addr(vf->dflt_lan_addr.addr);
+
+	vf->num_mac--;
+
+	return 0;
+}
+
+/**
+ * ice_vc_handle_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @set: true if MAC filters are being set, false otherwise
+ *
+ * add guest MAC address filter
+ */
+static int
+ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
+{
+	int (*ice_vc_cfg_mac)
+		(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr);
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_ether_addr_list *al =
+	    (struct virtchnl_ether_addr_list *)msg;
+	struct ice_pf *pf = vf->pf;
+	enum virtchnl_ops vc_op;
+	struct ice_vsi *vsi;
+	int i;
+
+	if (set) {
+		vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
+		ice_vc_cfg_mac = ice_vc_add_mac_addr;
+	} else {
+		vc_op = VIRTCHNL_OP_DEL_ETH_ADDR;
+		ice_vc_cfg_mac = ice_vc_del_mac_addr;
+	}
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+	    !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	/* If this VF is not privileged, then we can't add more than a
+	 * limited number of addresses. Check to make sure that the
+	 * additions do not push us over the limit.
+	 */
+	if (set && !ice_is_vf_trusted(vf) &&
+	    (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
+		dev_err(ice_pf_to_dev(pf), "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
+			vf->vf_id);
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	for (i = 0; i < al->num_elements; i++) {
+		u8 *mac_addr = al->list[i].addr;
+		int result;
+
+		if (is_broadcast_ether_addr(mac_addr) ||
+		    is_zero_ether_addr(mac_addr))
+			continue;
+
+		result = ice_vc_cfg_mac(vf, vsi, mac_addr);
+		if (result == -EEXIST || result == -ENOENT) {
+			continue;
+		} else if (result) {
+			v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+			goto handle_mac_exit;
+		}
+	}
+
+handle_mac_exit:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_add_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * add guest MAC address filter
+ */
+static int ice_vc_add_mac_addr_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_handle_mac_addr_msg(vf, msg, true);
+}
+
+/**
+ * ice_vc_del_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove guest MAC address filter
+ */
+static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_handle_mac_addr_msg(vf, msg, false);
+}
+
+/**
+ * ice_vc_request_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * VFs get a default number of queues but can use this message to request a
+ * different number. If the request is successful, PF will reset the VF and
+ * return 0. If unsuccessful, PF will send message informing VF of number of
+ * available queue pairs via virtchnl message response to VF.
+ */
+static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vf_res_request *vfres =
+		(struct virtchnl_vf_res_request *)msg;
+	u16 req_queues = vfres->num_queue_pairs;
+	struct ice_pf *pf = vf->pf;
+	u16 max_allowed_vf_queues;
+	u16 tx_rx_queue_left;
+	struct device *dev;
+	u16 cur_queues;
+
+	dev = ice_pf_to_dev(pf);
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	cur_queues = vf->num_vf_qs;
+	tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf),
+				 ice_get_avail_rxq_count(pf));
+	max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
+	if (!req_queues) {
+		dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n",
+			vf->vf_id);
+	} else if (req_queues > ICE_MAX_RSS_QS_PER_VF) {
+		dev_err(dev, "VF %d tried to request more than %d queues.\n",
+			vf->vf_id, ICE_MAX_RSS_QS_PER_VF);
+		vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF;
+	} else if (req_queues > cur_queues &&
+		   req_queues - cur_queues > tx_rx_queue_left) {
+		dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n",
+			 vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
+		vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
+					       ICE_MAX_RSS_QS_PER_VF);
+	} else {
+		/* request is successful, then reset VF */
+		vf->num_req_qs = req_queues;
+		ice_vc_reset_vf(vf);
+		dev_info(dev, "VF %d granted request of %u queues.\n",
+			 vf->vf_id, req_queues);
+		return 0;
+	}
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES,
+				     v_ret, (u8 *)vfres, sizeof(*vfres));
+}
+
+/**
+ * ice_set_vf_port_vlan
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @vlan_id: VLAN ID being set
+ * @qos: priority setting
+ * @vlan_proto: VLAN protocol
+ *
+ * program VF Port VLAN ID and/or QoS
+ */
+int
+ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+		     __be16 vlan_proto)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct device *dev;
+	struct ice_vf *vf;
+	u16 vlanprio;
+	int ret;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	if (vlan_id >= VLAN_N_VID || qos > 7) {
+		dev_err(dev, "Invalid Port VLAN parameters for VF %d, ID %d, QoS %d\n",
+			vf_id, vlan_id, qos);
+		return -EINVAL;
+	}
+
+	if (vlan_proto != htons(ETH_P_8021Q)) {
+		dev_err(dev, "VF VLAN protocol is not supported\n");
+		return -EPROTONOSUPPORT;
+	}
+
+	vf = &pf->vf[vf_id];
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		return ret;
+
+	vlanprio = vlan_id | (qos << VLAN_PRIO_SHIFT);
+
+	if (vf->port_vlan_info == vlanprio) {
+		/* duplicate request, so just return success */
+		dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio);
+		return 0;
+	}
+
+	mutex_lock(&vf->cfg_lock);
+
+	vf->port_vlan_info = vlanprio;
+
+	if (vf->port_vlan_info)
+		dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n",
+			 vlan_id, qos, vf_id);
+	else
+		dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
+
+	ice_vc_reset_vf(vf);
+	mutex_unlock(&vf->cfg_lock);
+
+	return 0;
+}
+
+/**
+ * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_VLAN capability is set, else return false
+ */
+static bool ice_vf_vlan_offload_ena(u32 caps)
+{
+	return !!(caps & VIRTCHNL_VF_OFFLOAD_VLAN);
+}
+
+/**
+ * ice_vc_process_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @add_v: Add VLAN if true, otherwise delete VLAN
+ *
+ * Process virtchnl op to add or remove programmed guest VLAN ID
+ */
+static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_filter_list *vfl =
+	    (struct virtchnl_vlan_filter_list *)msg;
+	struct ice_pf *pf = vf->pf;
+	bool vlan_promisc = false;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	struct ice_hw *hw;
+	int status = 0;
+	u8 promisc_m;
+	int i;
+
+	dev = ice_pf_to_dev(pf);
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		if (vfl->vlan_id[i] >= VLAN_N_VID) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			dev_err(dev, "invalid VF VLAN id %d\n",
+				vfl->vlan_id[i]);
+			goto error_param;
+		}
+	}
+
+	hw = &pf->hw;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (add_v && !ice_is_vf_trusted(vf) &&
+	    vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+		dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+			 vf->vf_id);
+		/* There is no need to let VF know about being not trusted,
+		 * so we can just return success message here
+		 */
+		goto error_param;
+	}
+
+	if (vsi->info.pvid) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+	     test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) &&
+	    test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags))
+		vlan_promisc = true;
+
+	if (add_v) {
+		for (i = 0; i < vfl->num_elements; i++) {
+			u16 vid = vfl->vlan_id[i];
+
+			if (!ice_is_vf_trusted(vf) &&
+			    vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+				dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+					 vf->vf_id);
+				/* There is no need to let VF know about being
+				 * not trusted, so we can just return success
+				 * message here as well.
+				 */
+				goto error_param;
+			}
+
+			/* we add VLAN 0 by default for each VF so we can enable
+			 * Tx VLAN anti-spoof without triggering MDD events so
+			 * we don't need to add it again here
+			 */
+			if (!vid)
+				continue;
+
+			status = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
+			if (status) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Enable VLAN pruning when non-zero VLAN is added */
+			if (!vlan_promisc && vid &&
+			    !ice_vsi_is_vlan_pruning_ena(vsi)) {
+				status = ice_cfg_vlan_pruning(vsi, true, false);
+				if (status) {
+					v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+					dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
+						vid, status);
+					goto error_param;
+				}
+			} else if (vlan_promisc) {
+				/* Enable Ucast/Mcast VLAN promiscuous mode */
+				promisc_m = ICE_PROMISC_VLAN_TX |
+					    ICE_PROMISC_VLAN_RX;
+
+				status = ice_set_vsi_promisc(hw, vsi->idx,
+							     promisc_m, vid);
+				if (status) {
+					v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+					dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
+						vid, status);
+				}
+			}
+		}
+	} else {
+		/* In case of non_trusted VF, number of VLAN elements passed
+		 * to PF for removal might be greater than number of VLANs
+		 * filter programmed for that VF - So, use actual number of
+		 * VLANS added earlier with add VLAN opcode. In order to avoid
+		 * removing VLAN that doesn't exist, which result to sending
+		 * erroneous failed message back to the VF
+		 */
+		int num_vf_vlan;
+
+		num_vf_vlan = vsi->num_vlan;
+		for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) {
+			u16 vid = vfl->vlan_id[i];
+
+			/* we add VLAN 0 by default for each VF so we can enable
+			 * Tx VLAN anti-spoof without triggering MDD events so
+			 * we don't want a VIRTCHNL request to remove it
+			 */
+			if (!vid)
+				continue;
+
+			/* Make sure ice_vsi_kill_vlan is successful before
+			 * updating VLAN information
+			 */
+			status = ice_vsi_kill_vlan(vsi, vid);
+			if (status) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Disable VLAN pruning when only VLAN 0 is left */
+			if (vsi->num_vlan == 1 &&
+			    ice_vsi_is_vlan_pruning_ena(vsi))
+				ice_cfg_vlan_pruning(vsi, false, false);
+
+			/* Disable Unicast/Multicast VLAN promiscuous mode */
+			if (vlan_promisc) {
+				promisc_m = ICE_PROMISC_VLAN_TX |
+					    ICE_PROMISC_VLAN_RX;
+
+				ice_clear_vsi_promisc(hw, vsi->idx,
+						      promisc_m, vid);
+			}
+		}
+	}
+
+error_param:
+	/* send the response to the VF */
+	if (add_v)
+		return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, v_ret,
+					     NULL, 0);
+	else
+		return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, v_ret,
+					     NULL, 0);
+}
+
+/**
+ * ice_vc_add_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Add and program guest VLAN ID
+ */
+static int ice_vc_add_vlan_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_process_vlan_msg(vf, msg, true);
+}
+
+/**
+ * ice_vc_remove_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove programmed guest VLAN ID
+ */
+static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_process_vlan_msg(vf, msg, false);
+}
+
+/**
+ * ice_vc_ena_vlan_stripping
+ * @vf: pointer to the VF info
+ *
+ * Enable VLAN header stripping for a given VF
+ */
+static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (ice_vsi_manage_vlan_stripping(vsi, true))
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_stripping
+ * @vf: pointer to the VF info
+ *
+ * Disable VLAN header stripping for a given VF
+ */
+static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_vsi_manage_vlan_stripping(vsi, false))
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization
+ * @vf: VF to enable/disable VLAN stripping for on initialization
+ *
+ * If the VIRTCHNL_VF_OFFLOAD_VLAN flag is set enable VLAN stripping, else if
+ * the flag is cleared then we want to disable stripping. For example, the flag
+ * will be cleared when port VLANs are configured by the administrator before
+ * passing the VF to the guest or if the AVF driver doesn't support VLAN
+ * offloads.
+ */
+static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+
+	if (!vsi)
+		return -EINVAL;
+
+	/* don't modify stripping if port VLAN is configured */
+	if (vsi->info.pvid)
+		return 0;
+
+	if (ice_vf_vlan_offload_ena(vf->driver_caps))
+		return ice_vsi_manage_vlan_stripping(vsi, true);
+	else
+		return ice_vsi_manage_vlan_stripping(vsi, false);
+}
+
+/**
+ * ice_vc_process_vf_msg - Process request from VF
+ * @pf: pointer to the PF structure
+ * @event: pointer to the AQ event
+ *
+ * called from the common asq/arq handler to
+ * process request from VF
+ */
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+	u32 v_opcode = le32_to_cpu(event->desc.cookie_high);
+	s16 vf_id = le16_to_cpu(event->desc.retval);
+	u16 msglen = event->msg_len;
+	u8 *msg = event->msg_buf;
+	struct ice_vf *vf = NULL;
+	struct device *dev;
+	int err = 0;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_validate_vf_id(pf, vf_id)) {
+		err = -EINVAL;
+		goto error_handler;
+	}
+
+	vf = &pf->vf[vf_id];
+
+	/* Check if VF is disabled. */
+	if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) {
+		err = -EPERM;
+		goto error_handler;
+	}
+
+	/* Perform basic checks on the msg */
+	err = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
+	if (err) {
+		if (err == VIRTCHNL_STATUS_ERR_PARAM)
+			err = -EPERM;
+		else
+			err = -EINVAL;
+	}
+
+error_handler:
+	if (err) {
+		ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
+				      NULL, 0);
+		dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
+			vf_id, v_opcode, msglen, err);
+		return;
+	}
+
+	/* VF is being configured in another context that triggers a VFR, so no
+	 * need to process this message
+	 */
+	if (!mutex_trylock(&vf->cfg_lock)) {
+		dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n",
+			 vf->vf_id);
+		return;
+	}
+
+	switch (v_opcode) {
+	case VIRTCHNL_OP_VERSION:
+		err = ice_vc_get_ver_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_VF_RESOURCES:
+		err = ice_vc_get_vf_res_msg(vf, msg);
+		if (ice_vf_init_vlan_stripping(vf))
+			dev_err(dev, "Failed to initialize VLAN stripping for VF %d\n",
+				vf->vf_id);
+		ice_vc_notify_vf_link_state(vf);
+		break;
+	case VIRTCHNL_OP_RESET_VF:
+		ice_vc_reset_vf_msg(vf);
+		break;
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
+		err = ice_vc_add_mac_addr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_ETH_ADDR:
+		err = ice_vc_del_mac_addr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+		err = ice_vc_cfg_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_QUEUES:
+		err = ice_vc_ena_qs_msg(vf, msg);
+		ice_vc_notify_vf_link_state(vf);
+		break;
+	case VIRTCHNL_OP_DISABLE_QUEUES:
+		err = ice_vc_dis_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_REQUEST_QUEUES:
+		err = ice_vc_request_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+		err = ice_vc_cfg_irq_map_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_KEY:
+		err = ice_vc_config_rss_key(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_LUT:
+		err = ice_vc_config_rss_lut(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_STATS:
+		err = ice_vc_get_stats_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+		err = ice_vc_cfg_promiscuous_mode_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_VLAN:
+		err = ice_vc_add_vlan_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_VLAN:
+		err = ice_vc_remove_vlan_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		err = ice_vc_ena_vlan_stripping(vf);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+		err = ice_vc_dis_vlan_stripping(vf);
+		break;
+	case VIRTCHNL_OP_UNKNOWN:
+	default:
+		dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
+			vf_id);
+		err = ice_vc_send_msg_to_vf(vf, v_opcode,
+					    VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+					    NULL, 0);
+		break;
+	}
+	if (err) {
+		/* Helper function cares less about error return values here
+		 * as it is busy with pending work.
+		 */
+		dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
+			 vf_id, v_opcode, err);
+	}
+
+	mutex_unlock(&vf->cfg_lock);
+}
+
+/**
+ * ice_get_vf_cfg
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ivi: VF configuration structure
+ *
+ * return VF configuration
+ */
+int
+ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vf *vf;
+
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+
+	if (ice_check_vf_init(pf, vf))
+		return -EBUSY;
+
+	ivi->vf = vf_id;
+	ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr);
+
+	/* VF configuration for VLAN and applicable QoS */
+	ivi->vlan = vf->port_vlan_info & VLAN_VID_MASK;
+	ivi->qos = (vf->port_vlan_info & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+
+	ivi->trusted = vf->trusted;
+	ivi->spoofchk = vf->spoofchk;
+	if (!vf->link_forced)
+		ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
+	else if (vf->link_up)
+		ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
+	else
+		ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
+	ivi->max_tx_rate = vf->tx_rate;
+	ivi->min_tx_rate = 0;
+	return 0;
+}
+
+/**
+ * ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch
+ * @pf: PF used to reference the switch's rules
+ * @umac: unicast MAC to compare against existing switch rules
+ *
+ * Return true on the first/any match, else return false
+ */
+static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac)
+{
+	struct ice_sw_recipe *mac_recipe_list =
+		&pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC];
+	struct ice_fltr_mgmt_list_entry *list_itr;
+	struct list_head *rule_head;
+	struct mutex *rule_lock; /* protect MAC filter list access */
+
+	rule_head = &mac_recipe_list->filt_rules;
+	rule_lock = &mac_recipe_list->filt_rule_lock;
+
+	mutex_lock(rule_lock);
+	list_for_each_entry(list_itr, rule_head, list_entry) {
+		u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0];
+
+		if (ether_addr_equal(existing_mac, umac)) {
+			mutex_unlock(rule_lock);
+			return true;
+		}
+	}
+
+	mutex_unlock(rule_lock);
+
+	return false;
+}
+
+/**
+ * ice_set_vf_mac
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @mac: MAC address
+ *
+ * program VF MAC address
+ */
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vf *vf;
+	int ret;
+
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	if (is_multicast_ether_addr(mac)) {
+		netdev_err(netdev, "%pM not a valid unicast address\n", mac);
+		return -EINVAL;
+	}
+
+	vf = &pf->vf[vf_id];
+	/* nothing left to do, unicast MAC already set */
+	if (ether_addr_equal(vf->dflt_lan_addr.addr, mac))
+		return 0;
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		return ret;
+
+	if (ice_unicast_mac_exists(pf, mac)) {
+		netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n",
+			   mac, vf_id, mac);
+		return -EINVAL;
+	}
+
+	mutex_lock(&vf->cfg_lock);
+
+	/* VF is notified of its new MAC via the PF's response to the
+	 * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
+	 */
+	ether_addr_copy(vf->dflt_lan_addr.addr, mac);
+	if (is_zero_ether_addr(mac)) {
+		/* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */
+		vf->pf_set_mac = false;
+		netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n",
+			    vf->vf_id);
+	} else {
+		/* PF will add MAC rule for the VF */
+		vf->pf_set_mac = true;
+		netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n",
+			    mac, vf_id);
+	}
+
+	ice_vc_reset_vf(vf);
+	mutex_unlock(&vf->cfg_lock);
+	return 0;
+}
+
+/**
+ * ice_set_vf_trust
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @trusted: Boolean value to enable/disable trusted VF
+ *
+ * Enable or disable a given VF as trusted
+ */
+int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vf *vf;
+	int ret;
+
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		return ret;
+
+	/* Check if already trusted */
+	if (trusted == vf->trusted)
+		return 0;
+
+	mutex_lock(&vf->cfg_lock);
+
+	vf->trusted = trusted;
+	ice_vc_reset_vf(vf);
+	dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
+		 vf_id, trusted ? "" : "un");
+
+	mutex_unlock(&vf->cfg_lock);
+
+	return 0;
+}
+
+/**
+ * ice_set_vf_link_state
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @link_state: required link state
+ *
+ * Set VF's link state, irrespective of physical link state status
+ */
+int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vf *vf;
+	int ret;
+
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		return ret;
+
+	switch (link_state) {
+	case IFLA_VF_LINK_STATE_AUTO:
+		vf->link_forced = false;
+		break;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		vf->link_forced = true;
+		vf->link_up = true;
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		vf->link_forced = true;
+		vf->link_up = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ice_vc_notify_vf_link_state(vf);
+
+	return 0;
+}
+
+/**
+ * ice_get_vf_stats - populate some stats for the VF
+ * @netdev: the netdev of the PF
+ * @vf_id: the host OS identifier (0-255)
+ * @vf_stats: pointer to the OS memory to be initialized
+ */
+int ice_get_vf_stats(struct net_device *netdev, int vf_id,
+		     struct ifla_vf_stats *vf_stats)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_eth_stats *stats;
+	struct ice_vsi *vsi;
+	struct ice_vf *vf;
+	int ret;
+
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		return ret;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi)
+		return -EINVAL;
+
+	ice_update_eth_stats(vsi);
+	stats = &vsi->eth_stats;
+
+	memset(vf_stats, 0, sizeof(*vf_stats));
+
+	vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
+		stats->rx_multicast;
+	vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
+		stats->tx_multicast;
+	vf_stats->rx_bytes   = stats->rx_bytes;
+	vf_stats->tx_bytes   = stats->tx_bytes;
+	vf_stats->broadcast  = stats->rx_broadcast;
+	vf_stats->multicast  = stats->rx_multicast;
+	vf_stats->rx_dropped = stats->rx_discards;
+	vf_stats->tx_dropped = stats->tx_discards;
+
+	return 0;
+}
+
+/**
+ * ice_print_vf_rx_mdd_event - print VF Rx malicious driver detect event
+ * @vf: pointer to the VF structure
+ */
+void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n",
+		 vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id,
+		 vf->dflt_lan_addr.addr,
+		 test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)
+			  ? "on" : "off");
+}
+
+/**
+ * ice_print_vfs_mdd_event - print VFs malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from ice_handle_mdd_event to rate limit and print VFs MDD events.
+ */
+void ice_print_vfs_mdd_events(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int i;
+
+	/* check that there are pending MDD events to print */
+	if (!test_and_clear_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state))
+		return;
+
+	/* VF MDD event logs are rate limited to one second intervals */
+	if (time_is_after_jiffies(pf->last_printed_mdd_jiffies + HZ * 1))
+		return;
+
+	pf->last_printed_mdd_jiffies = jiffies;
+
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		/* only print Rx MDD event message if there are new events */
+		if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
+			vf->mdd_rx_events.last_printed =
+							vf->mdd_rx_events.count;
+			ice_print_vf_rx_mdd_event(vf);
+		}
+
+		/* only print Tx MDD event message if there are new events */
+		if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
+			vf->mdd_tx_events.last_printed =
+							vf->mdd_tx_events.count;
+
+			dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n",
+				 vf->mdd_tx_events.count, hw->pf_id, i,
+				 vf->dflt_lan_addr.addr);
+		}
+	}
+}
+
+/**
+ * ice_restore_all_vfs_msi_state - restore VF MSI state after PF FLR
+ * @pdev: pointer to a pci_dev structure
+ *
+ * Called when recovering from a PF FLR to restore interrupt capability to
+ * the VFs.
+ */
+void ice_restore_all_vfs_msi_state(struct pci_dev *pdev)
+{
+	struct pci_dev *vfdev;
+	u16 vf_id;
+	int pos;
+
+	if (!pci_num_vf(pdev))
+		return;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+	if (pos) {
+		pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID,
+				     &vf_id);
+		vfdev = pci_get_device(pdev->vendor, vf_id, NULL);
+		while (vfdev) {
+			if (vfdev->is_virtfn && vfdev->physfn == pdev)
+				pci_restore_msi_state(vfdev);
+			vfdev = pci_get_device(pdev->vendor, vf_id,
+					       vfdev);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
new file mode 100644
index 000000000..d2e935c67
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_PF_H_
+#define _ICE_VIRTCHNL_PF_H_
+#include "ice.h"
+
+/* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */
+#define ICE_MAX_VLAN_PER_VF		8
+/* MAC filters: 1 is reserved for the VF's default/perm_addr/LAA MAC, 1 for
+ * broadcast, and 16 for additional unicast/multicast filters
+ */
+#define ICE_MAX_MACADDR_PER_VF		18
+
+/* Malicious Driver Detection */
+#define ICE_MDD_EVENTS_THRESHOLD		30
+
+/* Static VF transaction/status register def */
+#define VF_DEVICE_STATUS		0xAA
+#define VF_TRANS_PENDING_M		0x20
+
+/* wait defines for polling PF_PCI_CIAD register status */
+#define ICE_PCI_CIAD_WAIT_COUNT		100
+#define ICE_PCI_CIAD_WAIT_DELAY_US	1
+
+/* VF resource constraints */
+#define ICE_MAX_VF_COUNT		256
+#define ICE_MIN_QS_PER_VF		1
+#define ICE_NONQ_VECS_VF		1
+#define ICE_MAX_SCATTER_QS_PER_VF	16
+#define ICE_MAX_RSS_QS_PER_VF		16
+#define ICE_NUM_VF_MSIX_MED		17
+#define ICE_NUM_VF_MSIX_SMALL		5
+#define ICE_NUM_VF_MSIX_MULTIQ_MIN	3
+#define ICE_MIN_INTR_PER_VF		(ICE_MIN_QS_PER_VF + 1)
+#define ICE_MAX_VF_RESET_TRIES		40
+#define ICE_MAX_VF_RESET_SLEEP_MS	20
+
+#define ice_for_each_vf(pf, i) \
+	for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++)
+
+/* Specific VF states */
+enum ice_vf_states {
+	ICE_VF_STATE_INIT = 0,		/* PF is initializing VF */
+	ICE_VF_STATE_ACTIVE,		/* VF resources are allocated for use */
+	ICE_VF_STATE_QS_ENA,		/* VF queue(s) enabled */
+	ICE_VF_STATE_DIS,
+	ICE_VF_STATE_MC_PROMISC,
+	ICE_VF_STATE_UC_PROMISC,
+	ICE_VF_STATES_NBITS
+};
+
+/* VF capabilities */
+enum ice_virtchnl_cap {
+	ICE_VIRTCHNL_VF_CAP_L2 = 0,
+	ICE_VIRTCHNL_VF_CAP_PRIVILEGE,
+};
+
+/* VF MDD events print structure */
+struct ice_mdd_vf_events {
+	u16 count;			/* total count of Rx|Tx events */
+	/* count number of the last printed event */
+	u16 last_printed;
+};
+
+/* VF information structure */
+struct ice_vf {
+	struct ice_pf *pf;
+
+	/* Used during virtchnl message handling and NDO ops against the VF
+	 * that will trigger a VFR
+	 */
+	struct mutex cfg_lock;
+
+	u16 vf_id;			/* VF ID in the PF space */
+	u16 lan_vsi_idx;		/* index into PF struct */
+	/* first vector index of this VF in the PF space */
+	int first_vector_idx;
+	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
+	struct virtchnl_version_info vf_ver;
+	u32 driver_caps;		/* reported by VF driver */
+	struct virtchnl_ether_addr dflt_lan_addr;
+	DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF);
+	DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+	u16 port_vlan_info;		/* Port VLAN ID and QoS */
+	u8 pf_set_mac:1;		/* VF MAC address set by VMM admin */
+	u8 trusted:1;
+	u8 spoofchk:1;
+	u8 link_forced:1;
+	u8 link_up:1;			/* only valid if VF link is forced */
+	/* VSI indices - actual VSI pointers are maintained in the PF structure
+	 * When assigned, these will be non-zero, because VSI 0 is always
+	 * the main LAN VSI for the PF.
+	 */
+	u16 lan_vsi_num;		/* ID as used by firmware */
+	unsigned int tx_rate;		/* Tx bandwidth limit in Mbps */
+	DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS);	/* VF runtime states */
+
+	unsigned long vf_caps;		/* VF's adv. capabilities */
+	u8 num_req_qs;			/* num of queue pairs requested by VF */
+	u16 num_mac;
+	u16 num_vf_qs;			/* num of queue configured per VF */
+	struct ice_mdd_vf_events mdd_rx_events;
+	struct ice_mdd_vf_events mdd_tx_events;
+};
+
+#ifdef CONFIG_PCI_IOV
+void ice_process_vflr_event(struct ice_pf *pf);
+int ice_sriov_configure(struct pci_dev *pdev, int num_vfs);
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
+int
+ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi);
+
+void ice_free_vfs(struct ice_pf *pf);
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event);
+void ice_vc_notify_link_state(struct ice_pf *pf);
+void ice_vc_notify_reset(struct ice_pf *pf);
+bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr);
+bool ice_reset_vf(struct ice_vf *vf, bool is_vflr);
+void ice_restore_all_vfs_msi_state(struct pci_dev *pdev);
+
+int
+ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+		     __be16 vlan_proto);
+
+int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
+
+int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
+
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
+
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
+
+void ice_set_vf_state_qs_dis(struct ice_vf *vf);
+int
+ice_get_vf_stats(struct net_device *netdev, int vf_id,
+		 struct ifla_vf_stats *vf_stats);
+bool ice_is_any_vf_in_promisc(struct ice_pf *pf);
+void
+ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event);
+void ice_print_vfs_mdd_events(struct ice_pf *pf);
+void ice_print_vf_rx_mdd_event(struct ice_vf *vf);
+#else /* CONFIG_PCI_IOV */
+#define ice_process_vflr_event(pf) do {} while (0)
+#define ice_free_vfs(pf) do {} while (0)
+#define ice_vc_process_vf_msg(pf, event) do {} while (0)
+#define ice_vc_notify_link_state(pf) do {} while (0)
+#define ice_vc_notify_reset(pf) do {} while (0)
+#define ice_set_vf_state_qs_dis(vf) do {} while (0)
+#define ice_vf_lan_overflow_event(pf, event) do {} while (0)
+#define ice_print_vfs_mdd_events(pf) do {} while (0)
+#define ice_print_vf_rx_mdd_event(vf) do {} while (0)
+#define ice_restore_all_vfs_msi_state(pdev) do {} while (0)
+
+static inline bool
+ice_reset_all_vfs(struct ice_pf __always_unused *pf,
+		  bool __always_unused is_vflr)
+{
+	return true;
+}
+
+static inline bool
+ice_reset_vf(struct ice_vf __always_unused *vf, bool __always_unused is_vflr)
+{
+	return true;
+}
+
+static inline int
+ice_sriov_configure(struct pci_dev __always_unused *pdev,
+		    int __always_unused num_vfs)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_mac(struct net_device __always_unused *netdev,
+	       int __always_unused vf_id, u8 __always_unused *mac)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_get_vf_cfg(struct net_device __always_unused *netdev,
+	       int __always_unused vf_id,
+	       struct ifla_vf_info __always_unused *ivi)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_trust(struct net_device __always_unused *netdev,
+		 int __always_unused vf_id, bool __always_unused trusted)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_port_vlan(struct net_device __always_unused *netdev,
+		     int __always_unused vf_id, u16 __always_unused vid,
+		     u8 __always_unused qos, __be16 __always_unused v_proto)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_spoofchk(struct net_device __always_unused *netdev,
+		    int __always_unused vf_id, bool __always_unused ena)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_link_state(struct net_device __always_unused *netdev,
+		      int __always_unused vf_id, int __always_unused link_state)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
+		    struct ice_q_vector __always_unused *q_vector)
+{
+	return 0;
+}
+
+static inline int
+ice_get_vf_stats(struct net_device __always_unused *netdev,
+		 int __always_unused vf_id,
+		 struct ifla_vf_stats __always_unused *vf_stats)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool ice_is_any_vf_in_promisc(struct ice_pf __always_unused *pf)
+{
+	return false;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644
index 000000000..e0790df70
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -0,0 +1,927 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+	memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
+	       sizeof(vsi->rx_rings[q_idx]->rx_stats));
+	memset(&vsi->tx_rings[q_idx]->stats, 0,
+	       sizeof(vsi->tx_rings[q_idx]->stats));
+	if (ice_is_xdp_ena_vsi(vsi))
+		memset(&vsi->xdp_rings[q_idx]->stats, 0,
+		       sizeof(vsi->xdp_rings[q_idx]->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		synchronize_rcu();
+		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+	}
+	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+		     bool enable)
+{
+	if (!vsi->netdev || !q_vector)
+		return;
+
+	if (enable)
+		napi_enable(&q_vector->napi);
+	else
+		napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring,
+		 struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int base = vsi->base_vector;
+	u16 reg;
+	u32 val;
+
+	/* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+	 * here only QINT_RQCTL
+	 */
+	reg = rx_ring->reg_idx;
+	val = rd32(hw, QINT_RQCTL(reg));
+	val &= ~QINT_RQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_RQCTL(reg), val);
+
+	if (q_vector) {
+		u16 v_idx = q_vector->v_idx;
+
+		wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+		ice_flush(hw);
+		synchronize_irq(pf->msix_entries[v_idx + base].vector);
+	}
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	u16 reg_idx = q_vector->reg_idx;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_ring *ring;
+
+	ice_cfg_itr(hw, q_vector);
+
+	wr32(hw, GLINT_RATE(reg_idx),
+	     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx,
+				      q_vector->tx.itr_idx);
+
+	ice_for_each_ring(ring, q_vector->rx)
+		ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx,
+				      q_vector->rx.itr_idx);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+
+	ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_txq_meta txq_meta = { };
+	struct ice_ring *tx_ring, *rx_ring;
+	struct ice_q_vector *q_vector;
+	int timeout = 50;
+	int err;
+
+	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+		return -EINVAL;
+
+	tx_ring = vsi->tx_rings[q_idx];
+	rx_ring = vsi->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+	if (err)
+		return err;
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+		memset(&txq_meta, 0, sizeof(txq_meta));
+		ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+		err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+					   &txq_meta);
+		if (err)
+			return err;
+	}
+	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
+	if (err)
+		return err;
+
+	ice_qvec_toggle_napi(vsi, q_vector, false);
+	ice_qp_clean_rings(vsi, q_idx);
+	ice_qp_reset_stats(vsi, q_idx);
+
+	return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	struct ice_ring *tx_ring, *rx_ring;
+	struct ice_q_vector *q_vector;
+	u16 size;
+	int err;
+
+	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+		return -EINVAL;
+
+	size = struct_size(qg_buf, txqs, 1);
+	qg_buf = kzalloc(size, GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	tx_ring = vsi->tx_rings[q_idx];
+	rx_ring = vsi->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+	if (err)
+		goto free_buf;
+
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+		memset(qg_buf, 0, size);
+		qg_buf->num_txqs = 1;
+		err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+		if (err)
+			goto free_buf;
+		ice_set_ring_xdp(xdp_ring);
+		xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring);
+	}
+
+	err = ice_setup_rx_ctx(rx_ring);
+	if (err)
+		goto free_buf;
+
+	ice_qvec_cfg_msix(vsi, q_vector);
+
+	err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
+	if (err)
+		goto free_buf;
+
+	clear_bit(__ICE_CFG_BUSY, vsi->state);
+	ice_qvec_toggle_napi(vsi, q_vector, true);
+	ice_qvec_ena_irq(vsi, q_vector);
+
+	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+	kfree(qg_buf);
+	return err;
+}
+
+/**
+ * ice_xsk_alloc_pools - allocate a buffer pool for an XDP socket
+ * @vsi: VSI to allocate the buffer pool on
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_alloc_pools(struct ice_vsi *vsi)
+{
+	if (vsi->xsk_pools)
+		return 0;
+
+	vsi->xsk_pools = kcalloc(vsi->num_xsk_pools, sizeof(*vsi->xsk_pools),
+				 GFP_KERNEL);
+
+	if (!vsi->xsk_pools) {
+		vsi->num_xsk_pools = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_xsk_remove_pool - Remove an buffer pool for a certain ring/qid
+ * @vsi: VSI from which the VSI will be removed
+ * @qid: Ring/qid associated with the buffer pool
+ */
+static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid)
+{
+	vsi->xsk_pools[qid] = NULL;
+	vsi->num_xsk_pools_used--;
+
+	if (vsi->num_xsk_pools_used == 0) {
+		kfree(vsi->xsk_pools);
+		vsi->xsk_pools = NULL;
+		vsi->num_xsk_pools = 0;
+	}
+}
+
+/**
+ * ice_xsk_pool_disable - disable a buffer pool region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
+{
+	if (!vsi->xsk_pools || qid >= vsi->num_xsk_pools ||
+	    !vsi->xsk_pools[qid])
+		return -EINVAL;
+
+	xsk_pool_dma_unmap(vsi->xsk_pools[qid], ICE_RX_DMA_ATTR);
+	ice_xsk_remove_pool(vsi, qid);
+
+	return 0;
+}
+
+/**
+ * ice_xsk_pool_enable - enable a buffer pool region
+ * @vsi: Current VSI
+ * @pool: pointer to a requested buffer pool region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+{
+	int err;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	if (!vsi->num_xsk_pools)
+		vsi->num_xsk_pools = min_t(u16, vsi->num_rxq, vsi->num_txq);
+	if (qid >= vsi->num_xsk_pools)
+		return -EINVAL;
+
+	err = ice_xsk_alloc_pools(vsi);
+	if (err)
+		return err;
+
+	if (vsi->xsk_pools && vsi->xsk_pools[qid])
+		return -EBUSY;
+
+	vsi->xsk_pools[qid] = pool;
+	vsi->num_xsk_pools_used++;
+
+	err = xsk_pool_dma_map(vsi->xsk_pools[qid], ice_pf_to_dev(vsi->back),
+			       ICE_RX_DMA_ATTR);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
+ * @vsi: Current VSI
+ * @pool: buffer pool to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+{
+	bool if_running, pool_present = !!pool;
+	int ret = 0, pool_failure = 0;
+
+	if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
+		netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
+		pool_failure = -EINVAL;
+		goto failure;
+	}
+
+	if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
+	    !is_power_of_2(vsi->tx_rings[qid]->count)) {
+		netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
+		pool_failure = -EINVAL;
+		goto failure;
+	}
+
+	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+	if (if_running) {
+		ret = ice_qp_dis(vsi, qid);
+		if (ret) {
+			netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
+			goto xsk_pool_if_up;
+		}
+	}
+
+	pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
+				      ice_xsk_pool_disable(vsi, qid);
+
+xsk_pool_if_up:
+	if (if_running) {
+		ret = ice_qp_ena(vsi, qid);
+		if (!ret && pool_present)
+			napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+		else if (ret)
+			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
+	}
+
+failure:
+	if (pool_failure) {
+		netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
+			   pool_present ? "en" : "dis", pool_failure);
+		return pool_failure;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns false if all allocations were successful, true if any fail.
+ */
+bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
+{
+	union ice_32b_rx_flex_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	struct ice_rx_buf *rx_buf;
+	bool ret = false;
+	dma_addr_t dma;
+
+	if (!count)
+		return false;
+
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	rx_buf = &rx_ring->rx_buf[ntu];
+
+	do {
+		rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
+		if (!rx_buf->xdp) {
+			ret = true;
+			break;
+		}
+
+		dma = xsk_buff_xdp_get_dma(rx_buf->xdp);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
+		rx_desc->wb.status_error0 = 0;
+
+		rx_desc++;
+		rx_buf++;
+		ntu++;
+
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = ICE_RX_DESC(rx_ring, 0);
+			rx_buf = rx_ring->rx_buf;
+			ntu = 0;
+		}
+	} while (--count);
+
+	if (rx_ring->next_to_use != ntu) {
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->wb.status_error0 = 0;
+		ice_release_rx_desc(rx_ring, ntu);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
+ * @rx_ring: Rx ring
+ */
+static void ice_bump_ntc(struct ice_ring *rx_ring)
+{
+	int ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+	prefetch(ICE_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @rx_buf: zero-copy Rx buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+	unsigned int metasize = rx_buf->xdp->data - rx_buf->xdp->data_meta;
+	unsigned int datasize = rx_buf->xdp->data_end - rx_buf->xdp->data;
+	unsigned int datasize_hard = rx_buf->xdp->data_end -
+				     rx_buf->xdp->data_hard_start;
+	struct sk_buff *skb;
+
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, rx_buf->xdp->data - rx_buf->xdp->data_hard_start);
+	memcpy(__skb_put(skb, datasize), rx_buf->xdp->data, datasize);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	xsk_buff_free(rx_buf->xdp);
+	rx_buf->xdp = NULL;
+	return skb;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
+{
+	int err, result = ICE_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	struct ice_ring *xdp_ring;
+	u32 act;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+	if (!xdp_prog) {
+		rcu_read_unlock();
+		return ICE_XDP_PASS;
+	}
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		if (err)
+			goto out_failure;
+		rcu_read_unlock();
+		return ICE_XDP_REDIR;
+	}
+
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
+		result = ice_xmit_xdp_buff(xdp, xdp_ring);
+		if (result == ICE_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_DROP:
+		result = ICE_XDP_CONSUMED;
+		break;
+	}
+
+	rcu_read_unlock();
+	return result;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	unsigned int xdp_xmit = 0;
+	bool failure = false;
+
+	while (likely(total_rx_packets < (unsigned int)budget)) {
+		union ice_32b_rx_flex_desc *rx_desc;
+		unsigned int size, xdp_res = 0;
+		struct ice_rx_buf *rx_buf;
+		struct sk_buff *skb;
+		u16 stat_err_bits;
+		u16 vlan_tag = 0;
+		u8 rx_ptype;
+
+		if (cleaned_count >= ICE_RX_BUF_WRITE) {
+			failure |= ice_alloc_rx_bufs_zc(rx_ring,
+							cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+		if (!ice_test_staterr(rx_desc, stat_err_bits))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+
+		size = le16_to_cpu(rx_desc->wb.pkt_len) &
+				   ICE_RX_FLX_DESC_PKT_LEN_M;
+		if (!size)
+			break;
+
+		rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+		rx_buf->xdp->data_end = rx_buf->xdp->data + size;
+		xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool);
+
+		xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp);
+		if (xdp_res) {
+			if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))
+				xdp_xmit |= xdp_res;
+			else
+				xsk_buff_free(rx_buf->xdp);
+
+			rx_buf->xdp = NULL;
+			total_rx_bytes += size;
+			total_rx_packets++;
+			cleaned_count++;
+
+			ice_bump_ntc(rx_ring);
+			continue;
+		}
+
+		/* XDP_PASS path */
+		skb = ice_construct_skb_zc(rx_ring, rx_buf);
+		if (!skb) {
+			rx_ring->rx_stats.alloc_buf_failed++;
+			break;
+		}
+
+		cleaned_count++;
+		ice_bump_ntc(rx_ring);
+
+		if (eth_skb_pad(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+		if (ice_test_staterr(rx_desc, stat_err_bits))
+			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+				       ICE_RX_FLEX_DESC_PTYPE_M;
+
+		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+		ice_receive_skb(rx_ring, skb, vlan_tag);
+	}
+
+	ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
+		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
+
+		return (int)total_rx_packets;
+	}
+
+	return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: max number of frames to xmit
+ *
+ * Returns true if cleanup/transmission is done.
+ */
+static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
+{
+	struct ice_tx_desc *tx_desc = NULL;
+	bool work_done = true;
+	struct xdp_desc desc;
+	dma_addr_t dma;
+
+	while (likely(budget-- > 0)) {
+		struct ice_tx_buf *tx_buf;
+
+		if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
+			xdp_ring->tx_stats.tx_busy++;
+			work_done = false;
+			break;
+		}
+
+		tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
+
+		if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
+			break;
+
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
+						 desc.len);
+
+		tx_buf->bytecount = desc.len;
+
+		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz =
+			ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0);
+
+		xdp_ring->next_to_use++;
+		if (xdp_ring->next_to_use == xdp_ring->count)
+			xdp_ring->next_to_use = 0;
+	}
+
+	if (tx_desc) {
+		ice_xdp_ring_update_tail(xdp_ring);
+		xsk_tx_release(xdp_ring->xsk_pool);
+	}
+
+	return budget > 0 && work_done;
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+	xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+	dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if cleanup/tranmission is done.
+ */
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
+{
+	int total_packets = 0, total_bytes = 0;
+	s16 ntc = xdp_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	u32 xsk_frames = 0;
+	bool xmit_done;
+
+	tx_desc = ICE_TX_DESC(xdp_ring, ntc);
+	tx_buf = &xdp_ring->tx_buf[ntc];
+	ntc -= xdp_ring->count;
+
+	do {
+		if (!(tx_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		total_bytes += tx_buf->bytecount;
+		total_packets++;
+
+		if (tx_buf->raw_buf) {
+			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+			tx_buf->raw_buf = NULL;
+		} else {
+			xsk_frames++;
+		}
+
+		tx_desc->cmd_type_offset_bsz = 0;
+		tx_buf++;
+		tx_desc++;
+		ntc++;
+
+		if (unlikely(!ntc)) {
+			ntc -= xdp_ring->count;
+			tx_buf = xdp_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(xdp_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+	} while (likely(--budget));
+
+	ntc += xdp_ring->count;
+	xdp_ring->next_to_clean = ntc;
+
+	if (xsk_frames)
+		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+
+	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
+		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
+
+	ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
+	xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
+
+	return budget > 0 && xmit_done;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+	       u32 __always_unused flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_q_vector *q_vector;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *ring;
+
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!ice_is_xdp_ena_vsi(vsi))
+		return -ENXIO;
+
+	if (queue_id >= vsi->num_txq)
+		return -ENXIO;
+
+	if (!vsi->xdp_rings[queue_id]->xsk_pool)
+		return -ENXIO;
+
+	ring = vsi->xdp_rings[queue_id];
+
+	/* The idea here is that if NAPI is running, mark a miss, so
+	 * it will run again. If not, trigger an interrupt and
+	 * schedule the NAPI from interrupt context. If NAPI would be
+	 * scheduled here, the interrupt affinity would not be
+	 * honored.
+	 */
+	q_vector = ring->q_vector;
+	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+		ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+	return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP buff pool attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->xsk_pools)
+		return false;
+
+	for (i = 0; i < vsi->num_xsk_pools; i++) {
+		if (vsi->xsk_pools[i])
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
+{
+	u16 i;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+		if (!rx_buf->xdp)
+			continue;
+
+		rx_buf->xdp = NULL;
+	}
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
+{
+	u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+		if (tx_buf->raw_buf)
+			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+		else
+			xsk_frames++;
+
+		tx_buf->raw_buf = NULL;
+
+		ntc++;
+		if (ntc >= xdp_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644
index 000000000..fad783690
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+#include "ice.h"
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
+		       u16 qid);
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
+#else
+static inline int
+ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
+		   struct xsk_buff_pool __always_unused *pool,
+		   u16 __always_unused qid)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
+		    int __always_unused budget)
+{
+	return 0;
+}
+
+static inline bool
+ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
+		    int __always_unused budget)
+{
+	return false;
+}
+
+static inline bool
+ice_alloc_rx_bufs_zc(struct ice_ring __always_unused *rx_ring,
+		     u16 __always_unused count)
+{
+	return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+	return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+	       u32 __always_unused queue_id, u32 __always_unused flags)
+{
+	return -EOPNOTSUPP;
+}
+
+#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
+#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
commit	5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree	a94efe259b9009378be6d90eb30d2b019d95c194 /drivers/net/ethernet/intel/ice
parent	Initial commit. (diff)
download	linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip